diff --git "a/measurement.json" "b/measurement.json" --- "a/measurement.json" +++ "b/measurement.json" @@ -1,439 +1,14 @@ { - "measurement": [ - { - "key": "model.layers.0.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.011630737222731113, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.010072254575788975, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.005012615118175745, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.005310783162713051, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.005310649052262306, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.0021244988311082125, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.011281419545412064, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.009994505904614925, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.0055008986964821815, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.0048739793710410595, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.005121907219290733, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.005336061120033264, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.004872351419180632, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.002883875509724021, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.002191320527344942, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.0028344537131488323, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.001967634540051222, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.0016501841600984335, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.001911453204229474, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.0015986047219485044, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.001864988123998046, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.0019112959271296859, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.0014767907559871674, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.001577819581143558, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.011630737222731113, - "qparams": { - "group_size": 32, + "measurement": { + "model.layers.0.self_attn": [ + { + "accuracy": 0.8824015855789185, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ 3, 2 @@ -443,15 +18,12 @@ 0.95 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.011630737222731113, - "qparams": { - "group_size": 32, + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ 3, 2 @@ -461,443 +33,12 @@ 0.95 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.0.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.011738219298422337, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.010142585262656212, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.004951257724314928, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.0052138702012598515, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.005213356576859951, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.0019807738717645407, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.011507593095302582, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.0100327767431736, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.005403483752161264, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.004741148091852665, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.004964396357536316, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.005180669948458672, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.0047391620464622974, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.002746971556916833, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.0019927809480577707, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.0027131896931678057, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.0017253898549824953, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.0013743627350777388, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.0016573881730437279, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.0013073759619146585, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.0016605271957814693, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.001657029613852501, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.001232474809512496, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.0012789323227480054, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.011738219298422337, - "qparams": { - "group_size": 32, + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ 3, 2 @@ -907,15 +48,12 @@ 0.95 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.011738219298422337, - "qparams": { - "group_size": 32, + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ 3, 2 @@ -926,4145 +64,127 @@ ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.0.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.11990325152873993, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.0728302076458931, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.04503817856311798, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.05186893790960312, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.05181986838579178, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.024772368371486664, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.08125360310077667, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.06685543805360794, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.056394390761852264, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03260814771056175, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.03908336162567139, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.04421577602624893, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.0322691984474659, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.025173503905534744, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.023200811818242073, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.02261660434305668, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.013305491767823696, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.011424027383327484, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.009700397960841656, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.008092949166893959, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.011635221540927887, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.009641947224736214, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.007470958400517702, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.006273801438510418, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03260814771056175, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03260814771056175, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.0.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.10812343657016754, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.06876958161592484, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.04198775812983513, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.04835709184408188, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.047646597027778625, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.024856586009263992, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.07080017775297165, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.06344916671514511, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.05160931870341301, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.031155569478869438, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.03368595242500305, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.036284394562244415, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.030688395723700523, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.02374156191945076, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.021816780790686607, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.01838713139295578, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.013599755242466927, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.012093132361769676, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.010899567976593971, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.009624836035072803, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.010226343758404255, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.010834424756467342, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.008077923208475113, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.008426333777606487, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.031155569478869438, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.031155569478869438, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.0.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.12046778947114944, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.11244820058345795, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.1098959743976593, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.10052074491977692, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.05442819744348526, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.052169881761074066, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.060635149478912354, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.05591491982340813, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.05489937961101532, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.0494537353515625, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.04770246148109436, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.030830441042780876, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.02682122215628624, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.026208916679024696, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.026059813797473907, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.015524552203714848, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.014113069511950016, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.013980443589389324, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.013217607513070107, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.013134046457707882, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.008719515055418015, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.009406216442584991, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.008527836762368679, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.007280480116605759, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.030830441042780876, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.030830441042780876, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.0.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.13876940310001373, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.13118737936019897, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.1287606805562973, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.1178327351808548, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.06309576332569122, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.06093485653400421, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.06969056278467178, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.06439398229122162, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.0635216161608696, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.05770501866936684, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.05548008531332016, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.03512418642640114, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03050968423485756, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.029952440410852432, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.02982788160443306, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.017507104203104973, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.015330685302615166, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.015193448401987553, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.014277685433626175, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.014196648262441158, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.009154126979410648, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.009120744653046131, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.008966657333076, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.005959493573755026, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.03512418642640114, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.03512418642640114, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + }, + { + "accuracy": 0.8982784152030945, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.0.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.12024237215518951, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.101817786693573, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.09374283999204636, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.08290618658065796, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.05298610404133797, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.045809466391801834, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.0669657364487648, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.05979461967945099, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.05535787343978882, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.043554555624723434, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.04185398668050766, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.03372780233621597, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.02859792858362198, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.025636693462729454, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.024880794808268547, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.017023608088493347, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.013818589970469475, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.013471472077071667, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.012081114575266838, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.011609114706516266, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.009426789358258247, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.00958154909312725, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.008459058590233326, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.007139632012695074, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.03372780233621597, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.03372780233621597, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + "bits_prop": [ + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.1.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.023091968148946762, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.015555170364677906, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.008996852673590183, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.009796328842639923, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.009481514804065228, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.0043272762559354305, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.01616731844842434, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.014582658186554909, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.010812761262059212, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.0071410140953958035, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.007659980095922947, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.008166026324033737, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.0069733005948364735, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.004737042356282473, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.004039943218231201, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.0041069146245718, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.002714573172852397, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.0022753404919058084, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.002325240755453706, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.0018964001210406423, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.002234810031950474, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.0022995888721197844, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.0015237913466989994, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.0016896792221814394, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.023091968148946762, - "qparams": { - "group_size": 32, + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ 3, 2 ], "bits_prop": [ - 0.05, - 0.95 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.023091968148946762, - "qparams": { - "group_size": 32, + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ 3, 2 ], "bits_prop": [ - 0.05, - 0.95 + 0.25, + 0.75 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.1.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.01961326412856579, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.013523860834538937, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.00747302919626236, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.008151691406965256, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.00791976135224104, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.0033667702227830887, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.014469869434833527, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.012849528342485428, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.00908152386546135, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.006218457594513893, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.0067299045622348785, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.007218083366751671, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.006104154512286186, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.003939121495932341, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.0032448626589030027, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.0036178608424961567, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.0022255077492445707, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.0017939668614417315, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.0019307994516566396, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.0014927821466699243, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.0019221948459744453, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.0019122154917567968, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.0011963536962866783, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.0013307651970535517, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.01961326412856579, - "qparams": { - "group_size": 32, + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ 3, 2 ], "bits_prop": [ - 0.05, - 0.95 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.01961326412856579, - "qparams": { - "group_size": 32, + { + "accuracy": 0.9125418663024902, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ 3, 2 ], "bits_prop": [ - 0.05, - 0.95 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.1.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.13694781064987183, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.09121209383010864, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.06642815470695496, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.06449497491121292, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.05879874899983406, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03525979816913605, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.08299369364976883, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.07551287114620209, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.06493111699819565, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03951020538806915, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.040463950484991074, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.04226595163345337, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.03596683219075203, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.028500594198703766, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.026472484692931175, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.02113575115799904, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.014949839562177658, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.013128602877259254, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.011264513246715069, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.009668315760791302, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.010955244302749634, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.010634355247020721, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.008307446725666523, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.006832723971456289, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03525979816913605, - "qparams": { - "group_size": 32, + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 6, - 3 + 3, + 2 ], "bits_prop": [ - 0.2, - 0.8 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03525979816913605, - "qparams": { - "group_size": 32, + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 6, + 4, 3 ], "bits_prop": [ - 0.2, - 0.8 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.1.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.1648101955652237, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.13343273103237152, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.11932387948036194, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.0981418713927269, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.07633618265390396, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.06266967952251434, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.09794116765260696, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.08635838329792023, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.07853825390338898, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.0550752729177475, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.053072646260261536, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.05040960758924484, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.0421520471572876, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.03763328120112419, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.03649726137518883, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.025601070374250412, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.021043146029114723, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.020079229027032852, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.01735270395874977, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.016624372452497482, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.014400430954992771, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.015390431508421898, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.012927105650305748, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.012260950170457363, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.03649726137518883, - "qparams": { - "group_size": 32, + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 6, - 4 + 3, + 2 ], "bits_prop": [ 0.1, @@ -5073,24898 +193,1369 @@ "scale_bits": 4 } }, - "best_option": { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.03649726137518883, - "qparams": { - "group_size": 32, + { + "accuracy": 0.9417982697486877, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 6, - 4 + 3, + 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.1.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.16362403333187103, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.15466660261154175, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.1518382728099823, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.13822512328624725, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.07591942697763443, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.07320614904165268, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.08423076570034027, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.07732467353343964, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.07643930613994598, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.06885766983032227, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.06626056879758835, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.04311027377843857, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03745805099606514, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.03684670478105545, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03669747710227966, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.021816818043589592, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.02012396603822708, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.01996842958033085, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.018831977620720863, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.018755361437797546, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.012409806251525879, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.013673617504537106, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.012242352589964867, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.010891897603869438, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.03684670478105545, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.03684670478105545, - "qparams": { - "group_size": 32, + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5, - 4 + 3, + 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.1.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.19523309171199799, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.18513913452625275, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.18195423483848572, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.165786013007164, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.09077209234237671, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.08769238740205765, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.10019142180681229, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.09231530129909515, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.09137183427810669, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.08252052962779999, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.07921504974365234, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.05108039081096649, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.044353798031806946, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.04370523989200592, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.04355039447546005, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.02560071088373661, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.02322547324001789, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.023052183911204338, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.021656351163983345, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.0215750839561224, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.013993660919368267, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.015011992305517197, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.013795748353004456, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.011288422159850597, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.02560071088373661, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.02560071088373661, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.1.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.013239845633506775, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.011550025083124638, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.00999768078327179, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.008862907998263836, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.0064110057428479195, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.005023104138672352, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.008919762447476387, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.008202103897929192, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.00665127532556653, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.005454970523715019, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.005285201128572226, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.004652597941458225, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.004286228213459253, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.003234872594475746, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.002967768581584096, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.0024562422186136246, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.0019430842949077487, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.0017378060147166252, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.001784398453310132, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.001552601926960051, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.001517941476777196, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.0016108903801068664, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.0011275814613327384, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.0012111511314287782, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.013239845633506775, - "qparams": { - "group_size": 32, + }, + { + "accuracy": 0.9427623748779297, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 3, - 2 + 4, + 3 ], "bits_prop": [ - 0.05, - 0.95 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.013239845633506775, - "qparams": { - "group_size": 32, + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.2.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.0579843670129776, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.04567988961935043, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.03882751986384392, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.03501465916633606, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.025655467063188553, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.019598517566919327, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.03489302098751068, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.0318126454949379, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.02732745185494423, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.019839109852910042, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.01952552981674671, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.017671117559075356, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.015165857970714569, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.012419469654560089, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.01169032882899046, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.008837662637233734, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.006555232685059309, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.006005110684782267, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.005435371771454811, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.004929506219923496, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.004609422758221626, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.004604033660143614, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.003660236019641161, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.0030720029026269913, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.03501465916633606, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.03501465916633606, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 + 4, + 3 ], "bits_prop": [ - 0.1, - 0.4, - 0.5 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.2.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.05919991806149483, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.04464643821120262, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.03581908717751503, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.033044297248125076, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.02566838636994362, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.01799565739929676, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.03672017902135849, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.033319249749183655, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.027789678424596786, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.019435370340943336, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.019466254860162735, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.018547311425209045, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.015892408788204193, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.012473703362047672, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.011530310846865177, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.009263944812119007, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.006652519106864929, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.005953731015324593, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.00547406543046236, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.00481667835265398, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.0048412443138659, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.004856469575315714, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.0036898618564009666, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.0032688446808606386, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.03581908717751503, - "qparams": { - "group_size": 32, + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ 4, - 2 + 3 ], "bits_prop": [ - 0.25, - 0.75 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.03581908717751503, - "qparams": { - "group_size": 32, + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ 4, - 2 + 3 ], "bits_prop": [ - 0.25, - 0.75 + 0.1, + 0.9 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.2.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.16937875747680664, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.14124611020088196, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.12983699142932892, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.11480414122343063, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.07753883302211761, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.06554295122623444, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.09476738423109055, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.08625192195177078, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.08060935139656067, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.06116408109664917, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.05834304168820381, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.04829014837741852, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.04128824174404144, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.03728265315294266, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.0362919345498085, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.024143753573298454, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.01915558986365795, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.018216408789157867, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.015857523307204247, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.015162157826125622, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.012628898955881596, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.012116560712456703, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.01117571722716093, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.007678456604480743, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.03728265315294266, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.03728265315294266, - "qparams": { - "group_size": 32, + }, + { + "accuracy": 0.9453083276748657, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5, - 4 + 4, + 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.2.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.17107564210891724, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.15104973316192627, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.14135795831680298, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.12027318775653839, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.08039070665836334, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.0714440867304802, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.09849310666322708, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.08917875587940216, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.08224780857563019, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.06540190428495407, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.06052470952272415, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.051283642649650574, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.043603189289569855, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.039586011320352554, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.03860730305314064, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.026096530258655548, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.02187005616724491, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.02114897407591343, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.01910586655139923, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.018505455926060677, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.014817661605775356, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.015612530522048473, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.01355936098843813, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.012287994846701622, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.026096530258655548, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.026096530258655548, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.2.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.2010604590177536, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.18915079534053802, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.18543827533721924, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.16783984005451202, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.09431464225053787, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.09046612679958344, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.10447006672620773, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.09635025262832642, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.09510654211044312, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.08446097373962402, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.08045072853565216, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.053402360528707504, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.04625486209988594, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.0453554131090641, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.04514150321483612, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.026712169870734215, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.023770619183778763, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.023545589298009872, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.021849486976861954, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.021716682240366936, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.014406366273760796, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.014957533217966557, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.014112599194049835, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.010764745064079762, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.026712169870734215, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.026712169870734215, - "qparams": { - "group_size": 128, + { + "accuracy": 0.95171058177948, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.2.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.2318328320980072, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.2185201197862625, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.21434690058231354, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.1940186321735382, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.1086912527680397, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.10434680432081223, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.12013097107410431, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.11090852320194244, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.10955997556447983, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.09746558964252472, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.09277377277612686, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06115536391735077, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.05296230688691139, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.051994189620018005, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.05176451429724693, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.030545711517333984, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.026696236804127693, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.026426326483488083, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.0244183000177145, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.024276765063405037, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.01608256995677948, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.015978317707777023, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.015765026211738586, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.010553430765867233, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.030545711517333984, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.030545711517333984, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.2.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.1914568543434143, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.16828623414039612, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.1587812751531601, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.14026571810245514, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.08669472485780716, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.07776404172182083, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.10385459661483765, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.09467954933643341, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.08946493268013, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.07301574945449829, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.06920285522937775, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.052747152745723724, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.04531319811940193, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.0417654775083065, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.040891245007514954, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.026481756940484047, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.022105682641267776, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.021682292222976685, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.01949465088546276, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.01893620565533638, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.01435878500342369, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.014627980999648571, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.01318915281444788, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.010496833361685276, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.026481756940484047, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.026481756940484047, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9549939036369324, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.3.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.04441331699490547, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.03722476214170456, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.03314447030425072, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.029378646984696388, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.020174488425254822, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.016668468713760376, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.02650384046137333, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.02414540946483612, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.021043729037046432, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.016228370368480682, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.01570286601781845, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.013466227799654007, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.011540492996573448, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.009762176312506199, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.009302958846092224, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.006746032275259495, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.0051696645095944405, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.004827243275940418, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.004409105982631445, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.004099591169506311, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.0035658301785588264, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.003574295900762081, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.0029561189003288746, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.0024456533137708902, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.03722476214170456, - "qparams": { - "group_size": 32, + }, + "k_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 3, - 2 + 4 ], "bits_prop": [ - 0.25, - 0.75 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.03722476214170456, - "qparams": { - "group_size": 32, + }, + "v_proj": { + "group_size": { + "4": 64 + }, "bits": [ - 3, - 2 + 4 ], "bits_prop": [ - 0.25, - 0.75 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.3.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.04410157725214958, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.03597539663314819, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.030760742723941803, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.02740085870027542, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.019671037793159485, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.01540758740156889, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.027188394218683243, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.02474101074039936, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.020746197551488876, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.015613710507750511, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.01536952517926693, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.013793000020086765, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.011801584623754025, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.009498214349150658, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.008878542110323906, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.006899451836943626, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.004974569194018841, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.004533924628049135, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.004180401563644409, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.0037550104316323996, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.003573039313778281, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.003483501262962818, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.002752480562776327, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.0022340649738907814, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.03597539663314819, - "qparams": { - "group_size": 32, + }, + "o_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 3, - 2 + 4 ], "bits_prop": [ - 0.25, - 0.75 + 1 ], "scale_bits": 4 } }, - "best_option": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.03597539663314819, - "qparams": { - "group_size": 32, + { + "accuracy": 0.9642151594161987, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, "bits": [ - 3, - 2 + 4 ], "bits_prop": [ - 0.25, - 0.75 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.3.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.17903465032577515, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.15225909650325775, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.14137053489685059, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.12425199151039124, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.0821290984749794, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.07120539247989655, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.09893202781677246, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.0909099280834198, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.08511298149824142, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.06597971171140671, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.0626428872346878, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.050281938165426254, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.04339878261089325, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.03942098468542099, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.0384499654173851, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.025107726454734802, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.02016340382397175, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.019283488392829895, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.016861489042639732, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.016182566061615944, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.012905782088637352, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.012539025396108627, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.011490581557154655, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.007765109185129404, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.025107726454734802, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "4": 64 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.025107726454734802, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.3.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.19150759279727936, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.16421853005886078, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.1534946858882904, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.12762551009655, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.08917918056249619, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.0788804143667221, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.10684332251548767, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.09697554260492325, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.09133327007293701, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.06933339685201645, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.06362047046422958, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.05519940331578255, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.047143466770648956, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.0436529703438282, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.042801711708307266, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.028002306818962097, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.02394549921154976, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.023230649530887604, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.020350256934762, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.019801504909992218, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.015795700252056122, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.01679140515625477, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.014698842540383339, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.013148817233741283, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.028002306818962097, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "4": 64 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.028002306818962097, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9659315347671509, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.3.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.20876121520996094, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.19662871956825256, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.19284510612487793, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.17487677931785583, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.09827428311109543, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.09428051859140396, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.10872958600521088, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.10029500722885132, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.09905748069286346, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.0881127342581749, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.08407623320817947, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.05547661334276199, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.048047494143247604, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.04715295881032944, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.04694818705320358, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.02775155007839203, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.024507369846105576, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.024260632693767548, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.022484945133328438, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.022354010492563248, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.0148264579474926, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.015106499195098877, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.014542900957167149, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.010523957200348377, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.02775155007839203, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.02775155007839203, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.3.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.24289251863956451, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.22893095016479492, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.22474847733974457, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.20400454103946686, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.11445983499288559, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.10989584773778915, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.1266368329524994, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.11669496446847916, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.11537103354930878, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.10277815163135529, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.09818383306264877, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06463780999183655, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.05583721771836281, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.05484173446893692, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.05461311340332031, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.032322581857442856, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.028245745226740837, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.027971412986516953, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.025888592004776, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.02573966793715954, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.01720542274415493, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.017036717385053635, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.016886234283447266, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.011442095041275024, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.032322581857442856, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.032322581857442856, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9692782163619995, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.3.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.21671931445598602, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.1907714307308197, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.18069855868816376, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.16017822921276093, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.09861071407794952, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.08870038390159607, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.11705345660448074, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.10671309381723404, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.10168298333883286, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.08310820162296295, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.07883554697036743, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.059506021440029144, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.05108710750937462, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.04747043550014496, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.04658551886677742, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.02992568165063858, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.02499263733625412, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.02456608973443508, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.022002406418323517, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.021428678184747696, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.0162461269646883, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.016307460144162178, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.015058993361890316, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.011561932042241096, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.02992568165063858, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.02992568165063858, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.4.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.059078000485897064, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.05004117265343666, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.04480322450399399, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.03996674343943596, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.026898130774497986, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.022452887147665024, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.035515401512384415, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.032091811299324036, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.028023304417729378, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.021952776238322258, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.021295253187417984, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.018091952428221703, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.015376812778413296, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.013059147633612156, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.012460623867809772, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.009085734374821186, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.006956097204238176, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.006518014706671238, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.006009766831994057, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.005614801775664091, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.004841798450797796, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.004855033941566944, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.004029791336506605, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.003394658211618662, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.035515401512384415, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.035515401512384415, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.4.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.055911850184202194, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.04610319808125496, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.04024018347263336, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.03602834790945053, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.024951409548521042, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.020081091672182083, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.034225985407829285, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.030771862715482712, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.026367170736193657, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.020142270252108574, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.019767682999372482, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.017400942742824554, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.014721816405653954, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.01210574246942997, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.011410274542868137, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.008756880648434162, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.006419604178518057, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.00593824777752161, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.00549009395763278, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.005030198022723198, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.004592718090862036, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.004522504284977913, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.0036204932257533073, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.0030665784142911434, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.03602834790945053, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.03602834790945053, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 + 5, + 4 ], "bits_prop": [ - 0.1, - 0.4, - 0.5 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.4.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.18882763385772705, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.1628705859184265, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.15257778763771057, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.1347401738166809, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.08680906891822815, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.07647702097892761, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.10330536961555481, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.09505237638950348, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.08971164375543594, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.07083119451999664, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.06716762483119965, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.0525355190038681, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.04537241533398628, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.04170074313879013, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.04079674556851387, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.026237107813358307, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.021390128880739212, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.02057371661067009, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.0181291326880455, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.01751137338578701, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.013581505976617336, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.013277778401970863, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.012269578874111176, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.008351797237992287, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.026237107813358307, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.026237107813358307, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9757019877433777, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.4.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.17584343254566193, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.15452685952186584, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.1464119851589203, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.12264680117368698, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.08199401944875717, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.07368416339159012, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.09805520623922348, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.08806222677230835, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.08387522399425507, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.06657476723194122, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.05997442454099655, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.05048391968011856, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.04290233552455902, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.04018770903348923, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.03952173516154289, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.025549106299877167, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.022035541012883186, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.021448476240038872, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.01921994425356388, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.018807532265782356, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.014291111379861832, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.01537342183291912, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.013433769345283508, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.01208406500518322, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.025549106299877167, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.025549106299877167, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.4.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.18906532227993011, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.1772509664297104, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.17326867580413818, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.15691599249839783, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.08916866779327393, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.08503223955631256, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.09962095320224762, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.0916716530919075, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.09005444496870041, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.07948250323534012, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.07587327063083649, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.05087447166442871, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.04396181181073189, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.04282935708761215, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.042564135044813156, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.025504454970359802, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.022249070927500725, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.02197514846920967, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.02031583897769451, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.020149443298578262, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.013692927546799183, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.013738271780312061, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.013320516794919968, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.009505732916295528, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.025504454970359802, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.025504454970359802, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9711165428161621, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.4.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.24434302747249603, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.22957733273506165, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.22483541071414948, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.203884094953537, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.11541179567575455, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.1103787049651146, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.12836436927318573, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.11823783814907074, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.11648935079574585, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.10315832495689392, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.09848669916391373, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06543666869401932, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.05658891052007675, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.05533723905682564, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.05504424870014191, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03279973194003105, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.028483938425779343, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.028162196278572083, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.025991998612880707, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.025806864723563194, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.017441028729081154, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.017164312303066254, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.017030639573931694, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.01140944566577673, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03279973194003105, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03279973194003105, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "5": 64 + }, "bits": [ 5 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.4.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.21374404430389404, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.18953511118888855, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.1799410730600357, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.15954023599624634, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.0975705161690712, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.08830487728118896, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.1161806508898735, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.10553935170173645, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.10034127533435822, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.08285727351903915, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.07859278470277786, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.059176649898290634, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.05064021050930023, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.04706351459026337, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.04617983102798462, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.029825830832123756, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.02491615153849125, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.02448434941470623, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.022090857848525047, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.021540900692343712, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.016268407925963402, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.016428213566541672, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.015074191614985466, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.01185592170804739, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.029825830832123756, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.029825830832123756, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9777677059173584, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.5.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.07154874503612518, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.06236449256539345, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.05729538947343826, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.05103188380599022, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.0328090600669384, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.028505533933639526, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.04170521721243858, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.03795890882611275, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.0339006669819355, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.02743472531437874, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.026364168152213097, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.021150384098291397, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.01816374808549881, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.015867995098233223, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.015284487046301365, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.01060265488922596, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.00836592260748148, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.007944808341562748, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.007317769806832075, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.006939725484699011, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.005610230378806591, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.005651548970490694, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.004811740014702082, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.003892355365678668, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.0339006669819355, - "qparams": { - "group_size": 32, + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, "bits": [ - 4, - 3 + 5, + 4 ], "bits_prop": [ - 0.05, - 0.95 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.0339006669819355, - "qparams": { - "group_size": 32, + }, + "v_proj": { + "group_size": { + "5": 32 + }, "bits": [ - 4, - 3 + 5 ], "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.5.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.06404893100261688, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.05487550050020218, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.04936563968658447, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.043941278010606766, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.029045412316918373, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.0244939923286438, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.037955936044454575, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.03470217436552048, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.030202291905879974, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.023958444595336914, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.023200305178761482, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.019254297018051147, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.016511978581547737, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.013991362415254116, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.013329577632248402, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.009619074873626232, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.007261351216584444, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.006787851918488741, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.006255988031625748, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.00581149710342288, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.00497006718069315, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.004866351373493671, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.004079107195138931, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.0031034464482218027, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.03470217436552048, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.03470217436552048, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.5.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.20590339601039886, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.18295198678970337, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.1744239330291748, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.1546642929315567, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.0956936925649643, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.086921826004982, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.11087003350257874, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.10201141238212585, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.0979299396276474, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.08025088161230087, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.07583951205015182, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.05623720586299896, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.04869839921593666, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.04579585790634155, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.04512307792901993, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.028041159734129906, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.02325177751481533, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.02256939932703972, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.020050253719091415, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.019574452191591263, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.014383772388100624, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.01388638187199831, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.013349611312150955, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.008381268940865993, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.028041159734129906, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.028041159734129906, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9830902814865112, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.5.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.20300191640853882, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.18298274278640747, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.17548704147338867, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.15319222211837769, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.0951252281665802, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.08752109110355377, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.10983578860759735, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.10032322257757187, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.09664864838123322, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.07944092899560928, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.07520939409732819, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.0565943717956543, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.04840995743870735, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.045978061854839325, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.045405883342027664, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.028535407036542892, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.024304941296577454, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.023760756477713585, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.021261489018797874, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.0208789873868227, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.015584949404001236, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.015799064189195633, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.014761622995138168, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.011483308859169483, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.028535407036542892, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.028535407036542892, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "6": 128 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.5.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.18142031133174896, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.17018340528011322, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.16636422276496887, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.15072211623191833, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.08554026484489441, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.08158881217241287, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.09548548609018326, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.08802768588066101, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.0863712951540947, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.07631325721740723, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.07279840856790543, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.04864881932735443, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.04214085638523102, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.041024260222911835, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.040760789066553116, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.024343516677618027, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.02115364745259285, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.020887063816189766, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.019287806004285812, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.01912079192698002, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.012882828712463379, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.012839056551456451, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.012515222653746605, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.008601214736700058, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.024343516677618027, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.024343516677618027, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9874641299247742, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.5.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.24144043028354645, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.22716215252876282, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.222556933760643, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.2019483894109726, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.1141461730003357, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.10919290035963058, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.126790851354599, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.11692626029253006, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.11514927446842194, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.10211502760648727, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.09746776521205902, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06461646407842636, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.055940769612789154, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.05466678738594055, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.05437171086668968, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03228280693292618, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.028036480769515038, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.02770949713885784, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.025588026270270348, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.02539679780602455, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.016958527266979218, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.01675421930849552, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.016530727967619896, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.010946005582809448, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03228280693292618, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03228280693292618, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "6": 32 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.5.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.20920421183109283, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.18713390827178955, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.1777515411376953, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.15813444554805756, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.09592191874980927, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.08701980859041214, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.11459960043430328, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.10415370017290115, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.09854362159967422, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.08205828815698624, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.07803484797477722, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.05844460427761078, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.049959562718868256, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.04621494561433792, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.04530154913663864, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.029452964663505554, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.0243463683873415, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.023883137851953506, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.021655205637216568, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.021061701700091362, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.016022970899939537, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.01592990942299366, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.01473891083151102, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.0112729137763381, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.029452964663505554, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.029452964663505554, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9845578670501709, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.6.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.06566662341356277, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.05688517540693283, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.051203567534685135, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.0456426739692688, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.030025893822312355, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.02542761340737343, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.039843179285526276, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.03610432147979736, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.03112751431763172, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.024996871128678322, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.024270297959446907, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.020237183198332787, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.017254263162612915, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.014546366408467293, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.013838691636919975, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.010137645527720451, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.007714844774454832, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.007224150933325291, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.006745200604200363, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.006287748459726572, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.005335751920938492, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.005363237578421831, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.0043676793575286865, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.0036858266685158014, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.03610432147979736, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.03610432147979736, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.6.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.059502266347408295, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.0506003238260746, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.044614966958761215, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.039764199405908585, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.02689073048532009, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.022079331800341606, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.0365220308303833, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.033210307359695435, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.02803664654493332, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.022142065688967705, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.02161545120179653, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.01851375214755535, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.015872636809945107, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.012988361530005932, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.012224407866597176, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.009249537251889706, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.006810973398387432, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.006280745379626751, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.005884887650609016, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.005372484214603901, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.004815048072487116, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.004737174604088068, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.0038040082436054945, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.0030845007859170437, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.0365220308303833, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.0365220308303833, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 + "bits_prop": [ + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.6.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.18248280882835388, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.16214944422245026, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.15350690484046936, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.13628675043582916, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.08470384031534195, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.07645551860332489, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.10034516453742981, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.09221114218235016, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.08680647611618042, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.07125064730644226, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.06746239960193634, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.050882305949926376, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.043983712792396545, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.040654413402080536, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.039810143411159515, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.02541799657046795, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.020730184391140938, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.020025063306093216, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.017950385808944702, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.017402341589331627, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.013044622726738453, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.012674720026552677, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.011833421885967255, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.007781792897731066, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.02541799657046795, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "6": 128 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.02541799657046795, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "6": 128 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.6.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.19038455188274384, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.16537146270275116, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.15479375422000885, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.13363616168498993, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.08839015662670135, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.07810603827238083, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.10746713727712631, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.09726442396640778, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.09110194444656372, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.07162598520517349, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.06713978201150894, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.05536162108182907, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.04705414921045303, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.043027572333812714, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.04205004498362541, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.027832722291350365, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.023085637018084526, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.022269215434789658, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.01990273967385292, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.019260844215750694, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.015066844411194324, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.01571604236960411, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.013714651577174664, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.011653943918645382, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.027832722291350365, - "qparams": { - "group_size": 128, + }, + { + "accuracy": 0.9907486438751221, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.027832722291350365, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "6": 32 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.6.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.17867936193943024, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.16785375773906708, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.16409721970558167, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.1491023600101471, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.08426667004823685, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.08043190836906433, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.09422215074300766, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.08680236339569092, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.08506544679403305, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.07542549073696136, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.07211484014987946, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.04805385693907738, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.041564445942640305, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.04041466489434242, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.04014696925878525, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.02405734546482563, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.0208986084908247, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.020632300525903702, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.019123462960124016, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.018951088190078735, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.012782863341271877, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.012780888937413692, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.012398689053952694, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.008655223995447159, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.02405734546482563, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "8": 32 + }, "bits": [ - 5 + 8 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.02405734546482563, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "6": 32 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.6.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.2350843846797943, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.22149236500263214, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.2168940156698227, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.19712883234024048, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.11107370257377625, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.10631314665079117, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.12372580170631409, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.11396772414445877, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.11204565316438675, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.09963250160217285, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.09531085193157196, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06299826502799988, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.05451206862926483, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.05320107564330101, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.05289101228117943, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03150005266070366, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.02727987989783287, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.026954753324389458, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.024952856823801994, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.024754993617534637, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.016569755971431732, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.01631811074912548, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.016133219003677368, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.01066030003130436, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03150005266070366, - "qparams": { - "group_size": 128, + }, + { + "accuracy": 0.9937370419502258, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, "bits": [ - 5 + 8 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03150005266070366, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "8": 128 + }, "bits": [ - 5 + 8 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.6.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.2174004316329956, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.194844588637352, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.18577741086483002, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.16483478248119354, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.09999208897352219, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.09123877435922623, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.11878763884305954, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.10761105269193649, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.1026422455906868, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.08550740033388138, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.08134613186120987, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.06056113913655281, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.051671721041202545, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.04823139309883118, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.04739754647016525, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.030440999194979668, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.02553630992770195, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.02512340061366558, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.02274872176349163, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.022217698395252228, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.01654282584786415, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.016788369044661522, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.015405109152197838, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.012128129601478577, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.030440999194979668, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "8": 128 + }, "bits": [ - 5 + 8 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.030440999194979668, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "8": 128 + }, "bits": [ - 5 + 8 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } } - }, - { - "key": "model.layers.7.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.0720716193318367, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.06426133215427399, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.05809533968567848, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.05172733590006828, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.03325524926185608, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.02860512211918831, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.04481473192572594, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.04037132114171982, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.03413477912545204, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.028397679328918457, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.02753693051636219, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.022816816344857216, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.01936725527048111, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.016144806519150734, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.015293260104954243, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.011429889127612114, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.008625037968158722, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.008066043257713318, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.007699178531765938, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.007154548075050116, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.006036166101694107, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.006101489067077637, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.004865370690822601, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.004251792561262846, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.03413477912545204, - "qparams": { - "group_size": 32, + ], + "model.layers.0.mlp": [ + { + "accuracy": 0.8896347880363464, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 4, - 3 + 3, + 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.03413477912545204, - "qparams": { - "group_size": 32, + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 4, - 3 + 3, + 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.7.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.06133595481514931, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.05404529720544815, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.047633104026317596, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.04227820038795471, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.0280048456043005, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.02331128902733326, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.03880739212036133, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.03544660657644272, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.028864599764347076, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.023758094757795334, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.023194393143057823, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.019674617797136307, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.016944101080298424, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.013551491312682629, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.012629234232008457, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.009851435199379921, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.007139271125197411, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.006530906073749065, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.006317491177469492, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.005703183356672525, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.005108444020152092, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.0050734165124595165, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.003932689782232046, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.003300883574411273, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.03544660657644272, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.03544660657644272, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.7.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.19221225380897522, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.17337742447853088, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.16593240201473236, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.14720314741134644, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.089864082634449, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.08238513767719269, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.10490945726633072, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.09581943601369858, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.09150072932243347, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.07632315158843994, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.07220336049795151, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.05336983874440193, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.04577777534723282, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.043018639087677, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.04235871881246567, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.02662641555070877, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.021944399923086166, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.021340178325772285, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.019162729382514954, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.01871659979224205, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.013712058775126934, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.013247372582554817, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.012628431431949139, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.008204196579754353, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.02662641555070877, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, "bits": [ - 5 + 6, + 3, + 2 ], "bits_prop": [ - 1.0 + 0.05, + 0.2, + 0.75 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.02662641555070877, - "qparams": { - "group_size": 128, + { + "accuracy": 0.8938291668891907, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.7.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.19961880147457123, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.1786590963602066, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.16882538795471191, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.14568151533603668, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.09386871755123138, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.08472268283367157, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.11412461847066879, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.1024649515748024, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.0958065316081047, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.07773580402135849, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.0737183690071106, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.05895848944783211, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.04993608593940735, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.04600513353943825, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.045048851519823074, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.029755961149930954, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.02513718418776989, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.02435997501015663, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.02214485965669155, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.02154926396906376, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.016484782099723816, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.017553195357322693, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.015229190699756145, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.013614676892757416, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.029755961149930954, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 - ], + 0.25, + 0.75 + ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.029755961149930954, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, "bits": [ - 5 + 6, + 3, + 2 ], "bits_prop": [ - 1.0 + 0.05, + 0.2, + 0.75 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.7.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.1761300414800644, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.16579565405845642, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.16229669749736786, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.14760924875736237, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.08321591466665268, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.07959499955177307, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.09265100210905075, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.08544014394283295, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.08398609608411789, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.07457789033651352, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.07137362658977509, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.0473363883793354, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.04097425192594528, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.03997275233268738, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03973591700196266, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.023693762719631195, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.02074485644698143, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.020504480227828026, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.0190152358263731, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.01886807195842266, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.01261780597269535, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.012776575982570648, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.012287318706512451, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.008817660622298717, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.023693762719631195, - "qparams": { - "group_size": 128, + }, + { + "accuracy": 0.9174986481666565, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.023693762719631195, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.3, + 0.7 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.7.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.23753029108047485, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.224023699760437, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.21960440278053284, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.19987249374389648, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.11243797838687897, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.10776839405298233, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.12511268258094788, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.11508514732122421, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.11343854665756226, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.10094301402568817, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.09663901478052139, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06393475830554962, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.05514717847108841, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.053966645151376724, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.053687963634729385, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03200753033161163, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.02786625362932682, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.027566539123654366, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.025545340031385422, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.025373468175530434, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.017094222828745842, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.016930140554904938, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.016695329919457436, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.011456333100795746, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03200753033161163, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, "bits": [ - 5 + 5, + 3 ], "bits_prop": [ - 1.0 + 0.05, + 0.95 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03200753033161163, - "qparams": { - "group_size": 128, + { + "accuracy": 0.926499605178833, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.7.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.2115316092967987, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.18832725286483765, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.17890965938568115, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.15967246890068054, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.09699700027704239, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.08783502131700516, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.11607325077056885, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.10522805899381638, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.09981242567300797, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.08283832669258118, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.07917797565460205, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.059263456612825394, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.050480857491493225, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.04677405208349228, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.045866020023822784, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.029808951541781425, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.02470569871366024, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.024273283779621124, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.02198246866464615, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.021408097818493843, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.01620144210755825, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.016248416155576706, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.014965670183300972, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.01163612212985754, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.029808951541781425, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.3, + 0.7 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.029808951541781425, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.05, + 0.95 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.8.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.07211717963218689, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.06383217126131058, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.05904073268175125, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.052151717245578766, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.03322591632604599, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.029229089617729187, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.041768528521060944, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.03819621354341507, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.034108638763427734, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.027888255193829536, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.026653669774532318, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.02118411473929882, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.018233373761177063, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.016029734164476395, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.015468045137822628, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.010602776892483234, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.008400519378483295, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.007991892285645008, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.007349912077188492, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.006984522566199303, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.005574035458266735, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.005596345290541649, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.004805734381079674, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.003786521265283227, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.034108638763427734, - "qparams": { - "group_size": 32, + }, + { + "accuracy": 0.948441743850708, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ 4, 3 ], "bits_prop": [ - 0.05, - 0.95 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.034108638763427734, - "qparams": { - "group_size": 32, + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ 4, 3 ], "bits_prop": [ - 0.05, - 0.95 + 0.25, + 0.75 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.8.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.062067579478025436, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.053822990506887436, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.048970580101013184, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.04307115077972412, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.028213901445269585, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.024139966815710068, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.03641221672296524, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.033174481242895126, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.029176564887166023, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.023318316787481308, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.022361181676387787, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.01849563606083393, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.015850966796278954, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.013583006337285042, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.01300427783280611, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.009246858768165112, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.0070726703852415085, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.0066490499302744865, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.0061022634617984295, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.005712481681257486, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.004813473206013441, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.0047250427305698395, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.004015539772808552, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.003073861123993993, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.03641221672296524, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.03641221672296524, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.8.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.18069499731063843, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.16105309128761292, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.15322035551071167, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.13506904244422913, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.08373605459928513, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.07608826458454132, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.09886595606803894, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.09004313498735428, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.08570363372564316, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.07025372982025146, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.06637690216302872, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.05030699074268341, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.04304011911153793, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.04019834101200104, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.03950796648859978, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.025133324787020683, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.020652707666158676, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.02002655901014805, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.017908520996570587, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.017432142049074173, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.01299868244677782, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.012772399000823498, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.011859405785799026, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.008220058865845203, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.025133324787020683, - "qparams": { - "group_size": 128, + }, + { + "accuracy": 0.9525308012962341, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.025133324787020683, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.25, + 0.75 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.8.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.20352551341056824, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.18269158899784088, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.17414529621601105, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.15150482952594757, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.09545019268989563, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.08685242384672165, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.1126457080245018, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.10234853625297546, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.09737683087587357, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.07994170486927032, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.07482805103063583, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.05789105221629143, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.04927476868033409, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.04616647958755493, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.04542269930243492, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.02896823361515999, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.024229537695646286, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.02355833910405636, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.021165700629353523, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.02067689597606659, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.015337927266955376, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.01566116325557232, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.014328384771943092, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.011057710275053978, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.02896823361515999, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, "bits": [ - 5 + 8, + 4, + 3 ], "bits_prop": [ - 1.0 + 0.05, + 0.1, + 0.85 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.02896823361515999, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9637812376022339, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.8.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.17315414547920227, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.16317257285118103, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.15972568094730377, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.1452513337135315, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.08177046477794647, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.07822863757610321, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.09113486111164093, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.08403681218624115, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.08251754939556122, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.07335157692432404, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.07014395296573639, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.046437621116638184, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.04023659974336624, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.039228081703186035, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.038994017988443375, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.023218531161546707, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.020267833024263382, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.02002338506281376, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.018573742359876633, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.01842588186264038, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.012231498956680298, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.01236443780362606, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.011890718713402748, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.00837226863950491, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.023218531161546707, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.25, + 0.75 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.023218531161546707, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, "bits": [ - 5 + 8, + 4 ], "bits_prop": [ - 1.0 + 0.05, + 0.95 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.8.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.23375041782855988, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.2204345315694809, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.21607647836208344, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.19664570689201355, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.11063794046640396, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.10597546398639679, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.12296073138713837, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.11326900869607925, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.11157418042421341, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.09926110506057739, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.09494776278734207, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06267832964658737, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.05420513451099396, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.05299696326255798, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.0527275986969471, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03133632242679596, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.027226092293858528, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.026920676231384277, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.02491748332977295, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.024740036576986313, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.01644907146692276, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.016336379572749138, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.016041696071624756, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.01077625434845686, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03133632242679596, - "qparams": { - "group_size": 128, + }, + { + "accuracy": 0.9721494913101196, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03133632242679596, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.8.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.2198585718870163, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.19646908342838287, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.18719089031219482, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.16610123217105865, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.10097676515579224, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.09186068177223206, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.11987590044736862, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.10873015224933624, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.10374748706817627, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.0860825628042221, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.08169867098331451, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.06114070117473602, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.052208948880434036, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.04873737320303917, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.04789552092552185, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03093086928129196, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.025808783248066902, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.02538967691361904, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.02294345013797283, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.022403227165341377, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.01710965856909752, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.01698688417673111, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.015980111435055733, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.012291481718420982, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03093086928129196, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, "bits": [ - 5 + 8, + 4 ], "bits_prop": [ - 1.0 + 0.05, + 0.95 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03093086928129196, - "qparams": { - "group_size": 128, + { + "accuracy": 0.975082516670227, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.9.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.09009096026420593, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.08002641052007675, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.07438887655735016, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.06581645458936691, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.04149125516414642, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.03678138181567192, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.0519937239587307, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.047300707548856735, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.042571429163217545, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.035015348345041275, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.033483102917671204, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.026451583951711655, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.022663436830043793, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.020089706405997276, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.019442273303866386, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.013278055936098099, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.010677834041416645, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.01021368708461523, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.009431484155356884, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.00901775062084198, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.007075635716319084, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.0072607435286045074, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.006166908890008926, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.0051637557335197926, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.035015348345041275, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.035015348345041275, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.9.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.07617826014757156, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.06687824428081512, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.061358433216810226, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.054201602935791016, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.034851767122745514, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03024376556277275, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.044595714658498764, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.04048856347799301, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.03585965931415558, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.029104145243763924, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.027952127158641815, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.022644706070423126, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.01937221549451351, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.016761166974902153, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.01609662175178528, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.011321286670863628, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.008720871061086655, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.008241374976933002, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.007583795115351677, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.0071486253291368484, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.005899570416659117, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.0057764556258916855, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.004976903088390827, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.003774403128772974, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.03585965931415558, - "qparams": { - "group_size": 32, + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, "bits": [ - 4, - 3 + 8, + 4 ], "bits_prop": [ 0.05, @@ -29973,7842 +1564,514 @@ "scale_bits": 4 } }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.03585965931415558, - "qparams": { - "group_size": 32, + { + "accuracy": 0.9739482402801514, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, "bits": [ - 4, - 3 + 5, + 4 ], "bits_prop": [ - 0.05, - 0.95 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.9.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.1871263086795807, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.16960568726062775, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.16322238743305206, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.14372588694095612, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.08724983036518097, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.0807315856218338, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.10011196881532669, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.09199172258377075, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.08869960904121399, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.07401558756828308, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.06958282738924026, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.05080205202102661, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.04384571686387062, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.04169647395610809, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.04118496924638748, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.025351714342832565, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.021174337714910507, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.020696479827165604, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.018444320186972618, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.0180991031229496, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.01299935020506382, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.0125667043030262, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.012208282947540283, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.007637053728103638, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.025351714342832565, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.25, + 0.75 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.025351714342832565, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, "bits": [ - 5 + 8, + 5, + 4 ], "bits_prop": [ - 1.0 + 0.05, + 0.1, + 0.85 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.9.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.20860640704631805, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.18637320399284363, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.17799516022205353, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.15255501866340637, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.09802111238241196, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.08951906859874725, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.11424871534109116, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.10412532836198807, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.09971048682928085, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.08021264523267746, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.07526807487010956, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.058699317276477814, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.05003226175904274, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.04724964126944542, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.04657333344221115, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.02934478037059307, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.02463795617222786, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.024019673466682434, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.02109411545097828, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.020656980574131012, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.015505259856581688, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.015626801177859306, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.01457528118044138, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.010812346823513508, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.02934478037059307, - "qparams": { - "group_size": 128, + }, + { + "accuracy": 0.9769377708435059, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.02934478037059307, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.25, + 0.75 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.9.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.1682402640581131, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.15796580910682678, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.1544819325208664, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.14040736854076385, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.07942131161689758, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.07576728612184525, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.08874957263469696, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.08170192688703537, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.0802004486322403, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.07101985067129135, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.06803274154663086, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.04532046616077423, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.039189305156469345, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.03815465420484543, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03791126236319542, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.0226895771920681, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.019862327724695206, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.019617708399891853, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.01819382980465889, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.01804029755294323, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.0121063943952322, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.01234073843806982, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.011758576147258282, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.008620773442089558, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.0226895771920681, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, "bits": [ - 5 + 8, + 5, + 4 ], "bits_prop": [ - 1.0 + 0.05, + 0.1, + 0.85 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.0226895771920681, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9864273071289062, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.9.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.22845380008220673, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.21466903388500214, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.2101544290781021, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.19103367626667023, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.10793954879045486, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.1030341386795044, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.12030277401208878, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.11067797243595123, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.108925960958004, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.09651694446802139, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.0924186035990715, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.061469487845897675, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.05303145945072174, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.051765911281108856, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.05147610604763031, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.030779406428337097, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.02673053927719593, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.026412658393383026, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.024427765980362892, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.02424030192196369, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.016400881111621857, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.01625746302306652, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.015985339879989624, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.010978642851114273, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.030779406428337097, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.25, + 0.75 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.030779406428337097, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, "bits": [ + 8, + 6, 5 ], "bits_prop": [ - 1.0 + 0.05, + 0.1, + 0.85 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.9.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.22201575338840485, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.19887450337409973, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.18958260118961334, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.16793224215507507, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.10201270133256912, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.09301034361124039, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.12145183980464935, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.11000208556652069, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.10470087081193924, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.08711262792348862, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.08250034600496292, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.061731331050395966, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.052770137786865234, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.04917185381054878, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.04829474538564682, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.031123347580432892, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.025938110426068306, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.02550220675766468, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.023050636053085327, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.02249043434858322, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.01705286279320717, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.01694849506020546, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.015872914344072342, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.012103382498025894, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.031123347580432892, - "qparams": { - "group_size": 128, + }, + { + "accuracy": 0.9869279265403748, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.031123347580432892, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.10.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.08165805041790009, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.0729716494679451, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.06822893768548965, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.05987631902098656, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.03782535344362259, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.033767081797122955, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.04660239443182945, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.04252758249640465, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.03862399980425835, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.03176198899745941, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.030167674645781517, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.023638572543859482, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.020327480509877205, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.018216239288449287, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.017688613384962082, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.011831102892756462, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.009521874599158764, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.009120048955082893, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.008327084593474865, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.007977988570928574, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.006221448536962271, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.006240727845579386, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.005467592738568783, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.004217624664306641, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.03176198899745941, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.03176198899745941, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 + 0.25, + 0.75 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.10.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.07238767296075821, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.06404079496860504, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.059501927345991135, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.05201229453086853, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.03323836624622345, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.029336150735616684, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.041390687227249146, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.037730950862169266, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.03407005965709686, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.027692576870322227, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.02627473883330822, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.02099590189754963, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.018007660284638405, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.01598266139626503, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.015458307228982449, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.010497494600713253, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.008256006054580212, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.007866589352488518, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.007140170317143202, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.006790434941649437, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.005436994601041079, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.005332096479833126, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.004704679362475872, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.003417506581172347, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.03407005965709686, - "qparams": { - "group_size": 32, + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, "bits": [ - 4, - 3 + 8, + 6, + 5 ], "bits_prop": [ 0.05, - 0.95 + 0.1, + 0.85 ], "scale_bits": 4 } }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.03407005965709686, - "qparams": { - "group_size": 32, + { + "accuracy": 0.9913632273674011, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, "bits": [ - 4, - 3 + 6 ], "bits_prop": [ - 0.05, - 0.95 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.10.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.17798054218292236, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.1600957214832306, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.15352648496627808, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.1342555582523346, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.08303304016590118, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.07623279839754105, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.0962999165058136, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.08780865371227264, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.08453303575515747, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.06960155814886093, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.06535278260707855, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.04903561994433403, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.04195966199040413, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.039750583469867706, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.03921207785606384, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.0244692824780941, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.020323684439063072, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.019808156415820122, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.017586002126336098, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.0172260832041502, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.012636547908186913, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.012301163747906685, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.011755874380469322, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.00775559339672327, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.0244692824780941, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "6": 128 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.0244692824780941, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, "bits": [ - 5 + 8, + 6 ], "bits_prop": [ - 1.0 + 0.05, + 0.95 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.10.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.19533640146255493, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.17521187663078308, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.16709434986114502, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.1461392641067505, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.0913165956735611, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.08339394629001617, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.10762929916381836, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.09784355759620667, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.09296060353517532, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.07659115642309189, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.07247483730316162, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.05532138794660568, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.04715553671121597, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.04418802261352539, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.04348871484398842, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.027742702513933182, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.023372959345579147, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.022753972560167313, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.02050374634563923, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.020050054416060448, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.014946410432457924, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.015322773717343807, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.01398971676826477, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.01108284667134285, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.027742702513933182, - "qparams": { - "group_size": 128, + }, + { + "accuracy": 0.9920967817306519, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, "bits": [ - 5 + 8, + 6 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.027742702513933182, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, "bits": [ - 5 + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 ], "bits_prop": [ - 1.0 + 0.15, + 0.85 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.10.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.16422820091247559, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.15338069200515747, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.1495712548494339, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.1354864239692688, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.07732786238193512, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.07343574613332748, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.08690424263477325, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.07993586361408234, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.07816450297832489, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.06867309659719467, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.06558573246002197, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.044368378818035126, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03833111375570297, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.0371551550924778, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03688054904341698, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.022227786481380463, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.019368674606084824, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.01910073310136795, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.017657138407230377, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.017480261623859406, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.011854828335344791, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.012097010388970375, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.011443568393588066, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.008474614471197128, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.0371551550924778, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03688054904341698, - "qparams": { - "group_size": 32, + }, + { + "accuracy": 0.9937444925308228, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, "bits": [ - 6, - 4 + 8, + 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.10.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.21859142184257507, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.20433735847473145, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.19949449598789215, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.18070125579833984, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.10297174751758575, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.09791138768196106, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.11530784517526627, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.1061035618185997, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.10408297181129456, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.09141291677951813, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.08725422620773315, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.05878901109099388, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.05075101554393768, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.049343667924404144, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.04900375381112099, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.029377611353993416, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.025394799187779427, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.025051873177289963, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.023055529221892357, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.022848909720778465, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.015425657853484154, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.015368206426501274, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.014930748380720615, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.01019855123013258, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.029377611353993416, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, "bits": [ - 5 + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.029377611353993416, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9946379065513611, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, "bits": [ - 5 + 8 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.10.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.22911971807479858, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.205362468957901, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.19585339725017548, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.1733909696340561, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.10565442591905594, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.0963311418890953, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.1256428062915802, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.11360473930835724, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.10838636010885239, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.09023305773735046, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.0854048803448677, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.06411357969045639, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.05469674617052078, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.0510561503469944, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.050181180238723755, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03233565762639046, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.027143089100718498, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.026707779616117477, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.024191709235310555, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.02362758480012417, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.017768006771802902, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.01799161545932293, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.016583044081926346, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.013151570223271847, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03233565762639046, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "8": 128 + }, "bits": [ - 5 + 8 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03233565762639046, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "8": 128 + }, "bits": [ - 5 + 8 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } } - }, - { - "key": "model.layers.11.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.09256531298160553, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.08248152583837509, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.07718408852815628, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.06761594116687775, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.04292650148272514, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.03834297135472298, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.05268257483839989, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.04801834002137184, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.04381653293967247, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.035913772881031036, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.03407302126288414, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.026785992085933685, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.02300245128571987, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.020742926746606827, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.020175818353891373, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.013439027592539787, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.01093963161110878, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.01050390675663948, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.009579991921782494, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.009207530878484249, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.00713398028165102, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.007280352991074324, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.006333521567285061, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.005103792063891888, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.035913772881031036, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.035913772881031036, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 + ], + "model.layers.1.self_attn": [ + { + "accuracy": 0.8756943941116333, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.11.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.07648559659719467, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.06777312606573105, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.06254742294549942, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.05464871972799301, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.03519495949149132, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.030893215909600258, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.0443708673119545, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.04049773886799812, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.03600326552987099, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.029340432956814766, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.027941228821873665, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.02255171537399292, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.019355637952685356, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.01694457046687603, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.01633831113576889, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.011272897012531757, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.008865037932991982, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.008415616117417812, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.007710602600127459, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.007308509666472673, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.005909684579819441, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.0058938125148415565, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.005066485609859228, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.003948786295950413, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.03600326552987099, - "qparams": { - "group_size": 32, + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 4, - 3 + 3, + 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.03600326552987099, - "qparams": { - "group_size": 32, + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 4, - 3 + 3, + 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.11.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.18400989472866058, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.16398829221725464, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.15640763938426971, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.1359318643808365, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.08528269827365875, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.07760187238454819, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.10015816241502762, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.09069626033306122, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.08692186325788498, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.0707087442278862, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.06621330976486206, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.050932884216308594, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.04331061616539955, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.04080544412136078, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.04022245109081268, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.025471573695540428, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.0208720862865448, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.020284580066800117, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.01791498437523842, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.017499199137091637, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.013129566796123981, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.012685353867709637, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.012043654918670654, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.008007937110960484, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.025471573695540428, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.05, + 0.95 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.025471573695540428, - "qparams": { - "group_size": 128, + { + "accuracy": 0.8898072242736816, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.11.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.19157981872558594, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.17439551651477814, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.1601950228214264, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.14270630478858948, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.09044171124696732, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.07964067906141281, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.12074490636587143, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.10761331021785736, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.09194907546043396, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.07831661403179169, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.07562781125307083, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.062413450330495834, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.052083730697631836, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.04418902471661568, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.042162396013736725, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.031281135976314545, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.02384069189429283, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.022553591057658195, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.02153927832841873, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.02029774710536003, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.016758406534790993, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.01698174700140953, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.014016001485288143, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.012322905473411083, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.031281135976314545, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.031281135976314545, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.25, + 0.75 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.11.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.16454891860485077, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.15342183411121368, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.14934693276882172, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.13522177934646606, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.07750845700502396, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.07341757416725159, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.08753129839897156, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.0804017186164856, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.07839496433734894, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.0687098279595375, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.06569962203502655, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.04478991776704788, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03864258900284767, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.03734607622027397, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03704056516289711, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.022458970546722412, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.019631095230579376, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.019343199208378792, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.01791056990623474, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.017720647156238556, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.01209291722625494, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.012507294304668903, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.01165094319730997, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.009002767503261566, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03704056516289711, - "qparams": { - "group_size": 32, + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 6, - 4 + 3, + 2 ], "bits_prop": [ 0.1, @@ -37817,9693 +2080,478 @@ "scale_bits": 4 } }, - "best_option": { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03704056516289711, - "qparams": { - "group_size": 32, + { + "accuracy": 0.898164689540863, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 6, - 4 + 3, + 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.11.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.21814629435539246, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.20374926924705505, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.19874177873134613, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.17976373434066772, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.10270601511001587, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.09750563651323318, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.11536982655525208, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.10608523339033127, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.10384249687194824, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.09107978641986847, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.08691620826721191, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.058735523372888565, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.05072291940450668, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.04920157417654991, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.04883977770805359, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.029376858845353127, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.025290589779615402, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.024926602840423584, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.022932343184947968, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.02270142361521721, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.015418082475662231, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.015277484431862831, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.014879638329148293, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.01006197091192007, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.029376858845353127, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.029376858845353127, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.11.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.22902558743953705, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.2057061493396759, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.19641238451004028, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.17412711679935455, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.10589486360549927, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.09665459394454956, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.12561193108558655, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.1138228103518486, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.10858717560768127, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.09070882946252823, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.08599625527858734, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.06442438811063766, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.05485786125063896, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.05122650787234306, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.05032428354024887, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03250904381275177, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.027241544798016548, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.02680133655667305, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.024336518719792366, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.02377927675843239, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.017805464565753937, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.018069954589009285, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.016572795808315277, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.013240179978311062, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03250904381275177, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03250904381275177, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9365696310997009, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.12.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.09696802496910095, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.0860891193151474, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.07978169620037079, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.06989629566669464, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.04489625617861748, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.03959231078624725, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.05721607431769371, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.05135607346892357, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.04595252871513367, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.037587013095617294, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.03607519716024399, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.029251258820295334, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.024723002687096596, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.021758705377578735, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.021015817299485207, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.014723792672157288, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.011604777537286282, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.011075926013290882, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.010206676088273525, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.009728492237627506, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.007893378846347332, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.007956371642649174, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.0067914193496108055, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.0056951288133859634, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.03607519716024399, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.03607519716024399, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.12.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.08210816979408264, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.07231788337230682, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.06651092320680618, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.058169372379779816, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.037635497748851776, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03285747393965721, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.048335328698158264, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.04347898066043854, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.03860028088092804, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03128273785114288, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.02996285818517208, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.024569429457187653, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.020825369283556938, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.018188055604696274, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.017524879425764084, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.012324049137532711, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.00957796722650528, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.009089161641895771, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.00834114570170641, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.007895627990365028, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.006480294279754162, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.006457718554884195, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.005511770956218243, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.00442088395357132, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03128273785114288, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03128273785114288, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.12.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.1839308887720108, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.165375754237175, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.15796694159507751, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.13841095566749573, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.08580884337425232, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.07847429811954498, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.10148054361343384, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.09160921722650528, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.08740480989217758, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.07198423892259598, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.06779872626066208, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.05163339897990227, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.04375246539711952, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.04111016169190407, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.040452372282743454, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.02584659308195114, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.021006248891353607, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.020425982773303986, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.018199870362877846, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.017762769013643265, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.013341381214559078, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.012777009978890419, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.012231973931193352, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.008029506541788578, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.02584659308195114, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.02584659308195114, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9377191066741943, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.12.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.2073022872209549, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.1792462170124054, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.16817495226860046, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.14666904509067535, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.09679964929819107, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.08551155775785446, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.1165609136223793, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.1052284687757492, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.09924555569887161, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.07773241400718689, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.07410319149494171, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.06024205684661865, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.05093195661902428, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.04709191620349884, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.04613867402076721, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.03028927557170391, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.025085555389523506, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.02425750531256199, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.021407220512628555, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.020784510299563408, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.016496360301971436, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.016771158203482628, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.015272385440766811, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.012272384949028492, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.03028927557170391, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.03028927557170391, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.12.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.16893884539604187, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.1572493016719818, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.15319813787937164, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.13862638175487518, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.07966563105583191, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.0754377469420433, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.08988645672798157, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.08244433254003525, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.08064532279968262, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.07052428275346756, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.0674358680844307, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.0460418239235878, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03970571607351303, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.03844866901636124, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03815239295363426, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.02310613915324211, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.020300818607211113, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.020017750561237335, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.018519306555390358, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.018332703039050102, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.012547009624540806, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.013041900470852852, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.012133525684475899, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.009527052752673626, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.02310613915324211, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.02310613915324211, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9404224157333374, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.12.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.22701063752174377, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.21154147386550903, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.20641139149665833, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.18676653504371643, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.10692194849252701, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.10144565999507904, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.11970797926187515, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.11017216742038727, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.1081414520740509, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.09463401138782501, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.0902877151966095, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.061100251972675323, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.052729375660419464, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.05127517879009247, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.050924576818943024, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03053792379796505, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.026453301310539246, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.026089267805218697, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.023969076573848724, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.023749718442559242, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.016098644584417343, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.01610187068581581, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.015599003992974758, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.010819620452821255, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03053792379796505, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03053792379796505, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.12.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.23076394200325012, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.2059362232685089, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.19512498378753662, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.17229154706001282, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.10630594938993454, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.09608884900808334, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.12835605442523956, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.1160779595375061, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.10928419977426529, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.09031403064727783, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.08556602895259857, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.06545372307300568, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.05578697472810745, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.05139187350869179, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.05031117796897888, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03304077312350273, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.02731948159635067, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.026778310537338257, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.02425781637430191, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.023570120334625244, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.01808794215321541, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.018215397372841835, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.01659032516181469, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.013226144015789032, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03304077312350273, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03304077312350273, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9539521336555481, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.13.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.10071555525064468, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.0905812606215477, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.085720494389534, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.07542254775762558, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.04681765288114548, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.0424637570977211, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.05599434673786163, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.05121631175279617, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.047668300569057465, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.03947388753294945, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.03737813979387283, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.028430940583348274, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.02447776310145855, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.02249571494758129, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.022008627653121948, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.014217679388821125, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.01169305294752121, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.011306488886475563, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.010241103358566761, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.009922895580530167, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.007454362697899342, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.007477130740880966, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.006741549354046583, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.00503388699144125, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.028430940583348274, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.028430940583348274, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.13.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.08155141770839691, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.07326892018318176, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.06863442808389664, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.06036997586488724, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.03768067806959152, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03384306654334068, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.0462709404528141, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.04228004068136215, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.03843000903725624, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.031828369945287704, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.0302627831697464, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.023428767919540405, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.020153893157839775, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.018113117665052414, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.01760602556169033, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.011715786531567574, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.009371710009872913, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.008989541791379452, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.008190678432583809, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.007853267714381218, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.0060961078852415085, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.0060110692866146564, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.005375642329454422, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.003901520511135459, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.031828369945287704, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.031828369945287704, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.13.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.20136982202529907, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.1811285763978958, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.17392760515213013, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.15250341594219208, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.09363549947738647, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.0861654058098793, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.10842204838991165, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.0986674576997757, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.09532827883958817, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.07862979918718338, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.07394837588071823, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.055102623999118805, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.04705934599041939, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.044794633984565735, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.04424213618040085, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.027495665475726128, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.022773904725909233, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.022223707288503647, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.019677335396409035, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.019310664385557175, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.014117000624537468, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.01355376560240984, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.013139455579221249, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.008292957209050655, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.027495665475726128, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.027495665475726128, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.13.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.22471027076244354, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.20135818421840668, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.19198618829250336, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.1718219369649887, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.10489698499441147, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.09516110271215439, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.12332937866449356, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.11248167604207993, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.10708155483007431, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.0890408530831337, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.08521190285682678, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.06343057006597519, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.0542764887213707, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.05081728845834732, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.049970593303442, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.031828250735998154, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.026971060782670975, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.026197023689746857, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.023899953812360764, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.023375853896141052, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.017148206010460854, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.017808716744184494, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.016025351360440254, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.013015106320381165, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.031828250735998154, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.031828250735998154, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9577215313911438, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.13.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.17141105234622955, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.15961390733718872, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.15535522997379303, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.1407216340303421, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.08086587488651276, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.0765085220336914, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.09134896844625473, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.08388948440551758, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.08183471858501434, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.07161029428243637, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.06845061480998993, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.04679650440812111, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.040355369448661804, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.03899480029940605, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.038669321686029434, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.023440314456820488, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.020544646307826042, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.020236443728208542, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.018735496327280998, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.018535327166318893, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.012627656571567059, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.013160338625311852, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.01215687021613121, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.009538723155856133, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.023440314456820488, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.023440314456820488, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "4": 64 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.13.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.23062290251255035, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.21504519879817963, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.209773451089859, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.18987393379211426, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.10869430750608444, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.1031094416975975, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.1219218447804451, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.11207448691129684, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.10988393425941467, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.09625429660081863, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.09177825599908829, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06220525875687599, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.05364011600613594, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.05211084708571434, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.05175478011369705, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03109717182815075, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.026865331456065178, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.0264891404658556, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.024348322302103043, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.0241183303296566, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.01639227196574211, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.016317754983901978, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.01585400104522705, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.010898971930146217, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03109717182815075, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03109717182815075, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9636719226837158, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.13.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.2405892312526703, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.21384651958942413, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.20280997455120087, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.17851442098617554, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.11076708137989044, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.09994419664144516, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.13295471668243408, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.12034018337726593, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.11401470750570297, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.09363088756799698, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.08865727484226227, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.0680796429514885, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.05790499225258827, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.053525540977716446, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.05245383828878403, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03434644266963005, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.028372574597597122, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.02785709872841835, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.025081340223550797, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.024396976456046104, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.01879993826150894, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.01880159229040146, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.017366690561175346, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.01358238235116005, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03434644266963005, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "4": 64 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03434644266963005, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.14.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.10245812684297562, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.09235779941082001, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.08688779920339584, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.07676247507333755, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.04765729233622551, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.04297928139567375, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.05841492488980293, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.053138863295316696, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.048570141196250916, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.04046044126152992, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.03864114359021187, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.029729176312685013, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.025428524240851402, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.022977637127041817, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.022367088124155998, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.014874840155243874, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.012059287168085575, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.011604007333517075, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.010657493956387043, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.010259810835123062, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.007847926579415798, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.007938781753182411, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.006966258864849806, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.00548553979024291, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.029729176312685013, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.029729176312685013, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.14.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.08169177174568176, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.07389360666275024, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.06812766939401627, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.06037198379635811, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.03780690208077431, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03343738242983818, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.048573508858680725, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.044253088533878326, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.038533952087163925, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03235386312007904, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.03108319640159607, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.024592839181423187, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.021103594452142715, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.018205586820840836, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.017486976459622383, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.012282849289476871, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.009500248357653618, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.009000557474792004, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.008424768224358559, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.007950066588819027, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.00642105657607317, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.006342875771224499, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.0054316287860274315, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.00418210681527853, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03235386312007904, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03235386312007904, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 + "bits_prop": [ + 1 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.14.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.20660905539989471, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.18667219579219818, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.1788690835237503, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.15729199349880219, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.09636948257684708, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.08859109878540039, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.11356871575117111, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.10221954435110092, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.09797311574220657, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.08139675855636597, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.0767914429306984, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.05798523128032684, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.04884837195277214, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.046124789863824844, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.04548489302396774, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.02897627279162407, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.023648375645279884, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.02306087128818035, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.020619574934244156, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.020168442279100418, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.01497263927012682, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.01442685630172491, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.013725994154810905, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.009240293875336647, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.02897627279162407, - "qparams": { - "group_size": 128, + }, + { + "accuracy": 0.9654274582862854, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.02897627279162407, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.14.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.2376875877380371, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.2045355886220932, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.19205813109874725, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.17105035483837128, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.11162621527910233, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.09861379116773605, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.1328001320362091, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.11997561156749725, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.11426516622304916, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.08956333994865417, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.08595653623342514, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.06835009157657623, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.0578678697347641, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.05406574532389641, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.053146880120038986, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.03421249985694885, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.028551490977406502, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.02771824784576893, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.024256492033600807, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.0236493032425642, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.018225908279418945, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.018667053431272507, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.01699858531355858, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.013406678102910519, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.03421249985694885, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.03421249985694885, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.14.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.1596861481666565, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.14895260334014893, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.14472082257270813, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.13140149414539337, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.07548598945140839, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.07138902693986893, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.08582186698913574, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.07875743508338928, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.07639956474304199, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.06706064194440842, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.06432289630174637, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.0439520962536335, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.037969302386045456, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.036495961248874664, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03614199906587601, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.022045837715268135, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.019355488941073418, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.01905001327395439, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.017726151272654533, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.01751033030450344, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.011895671486854553, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.012599878013134003, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.011390559375286102, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.00928905513137579, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.036495961248874664, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.036495961248874664, - "qparams": { - "group_size": 32, + }, + { + "accuracy": 0.96928870677948, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, "bits": [ 5, 4 @@ -47513,3225 +2561,140 @@ 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.14.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.2273930162191391, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.21268604695796967, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.20746004581451416, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.1882474720478058, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.1074371263384819, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.10208060592412949, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.12085370719432831, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.11107544600963593, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.10860584676265717, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.09553547203540802, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.09132036566734314, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06172287091612816, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.05322474613785744, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.05160031467676163, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.05120711773633957, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.030894743278622627, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.02673148363828659, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.026360370218753815, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.024342790246009827, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.02409946732223034, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.01640193909406662, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.01645614579319954, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.015828054398298264, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.011232215911149979, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.030894743278622627, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.030894743278622627, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.14.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.22967243194580078, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.20369216799736023, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.19271503388881683, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.17040996253490448, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.10560467094182968, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.09511011093854904, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.12874282896518707, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.11529236286878586, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.10876034945249557, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.08956174552440643, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.08522496372461319, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.06579460203647614, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.05571757256984711, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.05131338909268379, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.05021841824054718, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03346196934580803, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.027689620852470398, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.027157273143529892, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.024677714332938194, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.023997901007533073, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.0188027061522007, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.018953600898385048, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.017286187037825584, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.014282168820500374, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03346196934580803, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03346196934580803, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9719192981719971, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.15.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.11593811959028244, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.10556387901306152, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.09980233013629913, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.08857736736536026, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.054099660366773605, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.04923728480935097, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.06621119379997253, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.05996343493461609, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.055014342069625854, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.046521853655576706, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.04446468502283096, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03372194617986679, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.028783518821001053, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.026160327717661858, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.02551254816353321, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.016969842836260796, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.013826042413711548, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.01333808433264494, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.012348240241408348, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.011938122101128101, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.00904613547027111, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.009182138368487358, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.008074374869465828, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.006499743554741144, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03372194617986679, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03372194617986679, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.15.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.08721649646759033, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.07923328131437302, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.07277140766382217, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.06468330323696136, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.04024588689208031, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.035483673214912415, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.052738215774297714, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.04773329198360443, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.04102274402976036, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03482592850923538, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.03371570631861687, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.026762157678604126, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.02285088412463665, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.019402483478188515, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.018514664843678474, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.013424094766378403, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.01016910094767809, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.009570864960551262, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.009100627154111862, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.008534416556358337, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.007014062255620956, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.006904172245413065, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.005803526379168034, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.004575809463858604, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03482592850923538, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03482592850923538, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.15.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.22414131462574005, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.2056114375591278, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.19815321266651154, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.17767085134983063, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.10541015863418579, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.0980909988284111, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.12267021089792252, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.1114264726638794, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.1070697158575058, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.09138365089893341, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.08708939701318741, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.062498539686203, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.053305353969335556, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.0505734384059906, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.049881864339113235, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.031275928020477295, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.02581145614385605, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.02521493472158909, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.022928336635231972, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.022489767521619797, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.016118058934807777, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.015568121336400509, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.015010611154139042, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.009775077924132347, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.031275928020477295, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.031275928020477295, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.15.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.21927639842033386, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.19726434350013733, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.18883565068244934, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.1620447188615799, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.10204917192459106, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.09357118606567383, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.11990394443273544, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.10888994485139847, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.10436265170574188, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.0852905660867691, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.07908880710601807, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.06151650473475456, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.052271127700805664, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.04914182797074318, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.04837953671813011, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.030809417366981506, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.02564294822514057, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.024961955845355988, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.022306472063064575, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.021815214306116104, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.016335098072886467, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.01631356030702591, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.015309845097362995, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.011269740760326385, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.030809417366981506, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.030809417366981506, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9719778299331665, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.15.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.15985344350337982, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.14965850114822388, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.1457061469554901, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.13237455487251282, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.07560217380523682, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.07173927873373032, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.08570526540279388, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.07866722345352173, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.07643868029117584, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.06753623485565186, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.06470981240272522, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.04388384148478508, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03795023635029793, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.036557313054800034, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.036226481199264526, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.022061698138713837, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.01939924992620945, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.019110720604658127, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.017830103635787964, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.017629332840442657, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.01201019249856472, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.012611942365765572, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.011545242741703987, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.009321235120296478, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.036557313054800034, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.036557313054800034, - "qparams": { - "group_size": 32, + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ 5, 4 @@ -50741,5068 +2704,210 @@ 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.15.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.2316822111606598, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.21770831942558289, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.21277756989002228, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.1933870166540146, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.1095205768942833, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.10448896884918213, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.12273158133029938, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.1128762811422348, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.11061139404773712, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.09798967838287354, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.09378112107515335, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06257695704698563, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.05402350425720215, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.05249975621700287, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.05214225500822067, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03127190098166466, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.02698861062526703, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.026632340624928474, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.024636702612042427, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.02440585009753704, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.016464488580822945, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.016274036839604378, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.01594630256295204, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.010731959715485573, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03127190098166466, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "5": 64 + }, "bits": [ 5 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03127190098166466, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.15.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.22509044408798218, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.19883203506469727, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.18768078088760376, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.1657494157552719, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.10312990099191666, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.09243009239435196, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.12469343841075897, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.11306943744421005, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.1064097136259079, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.08689720928668976, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.082539401948452, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.06360332667827606, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.05434774234890938, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.049839261919260025, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.04873258247971535, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03216002136468887, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.02644464001059532, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.025902310386300087, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.023318501189351082, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.022605642676353455, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.01773657649755478, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.01761176995933056, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.016273686662316322, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.012681451626121998, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03216002136468887, - "qparams": { - "group_size": 128, + }, + { + "accuracy": 0.974926233291626, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03216002136468887, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.16.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.11408249288797379, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.1044951006770134, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.09959302097558975, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.08890281617641449, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.05320742726325989, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.04902873560786247, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.06348218023777008, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.057953961193561554, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.0540272556245327, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.046104732900857925, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.043902624398469925, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.032277218997478485, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.027669966220855713, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.025573132559657097, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.025055905804038048, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.01616964489221573, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.013285673223435879, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.012880362570285797, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.011867338791489601, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.011532752774655819, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.008503188379108906, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.008458183147013187, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.007728704251348972, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.005677549168467522, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.032277218997478485, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.032277218997478485, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.16.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.0901157334446907, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.08256468176841736, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.0767500251531601, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.0687340646982193, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.04164206236600876, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03741396591067314, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.052746277302503586, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.048356425017118454, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.0423717126250267, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03636355698108673, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.034933410584926605, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.026611056178808212, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.023020537570118904, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.020020969212055206, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.01925722323358059, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.01332923211157322, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.010353170335292816, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.009835650213062763, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.009291172958910465, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.008794024586677551, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.006930916104465723, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.006771096494048834, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.005912900902330875, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.004302533343434334, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03636355698108673, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03636355698108673, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 + "bits_prop": [ + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.16.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.23001228272914886, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.2127782255411148, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.2062217891216278, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.18547028303146362, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.10825490951538086, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.10156949609518051, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.12459421157836914, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.11331479996442795, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.10969340801239014, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.09484528750181198, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.09041266143321991, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.06353078037500381, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.05416981503367424, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.05178692564368248, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.05121926590800285, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.031733188778162, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.02630515582859516, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.025791345164179802, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.023531978949904442, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.023162448778748512, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.016287673264741898, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.015584414824843407, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.015302883461117744, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.00953525211662054, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.031733188778162, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.031733188778162, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9834169745445251, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.16.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.23052990436553955, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.20676463842391968, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.1987254023551941, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.1753118634223938, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.10741071403026581, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.09900426119565964, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.12397616356611252, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.11299002170562744, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.10951153188943863, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.0906682088971138, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.08558036386966705, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.06375535577535629, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.05463555082678795, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.05206514522433281, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.05143772065639496, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.03222070261836052, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.02771874889731407, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.02713312953710556, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.02447556145489216, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.024064410477876663, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.01771438494324684, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.018230663612484932, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.01691283844411373, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.01349329762160778, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.03222070261836052, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.03222070261836052, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "6": 128 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.16.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.14583414793014526, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.1366492062807083, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.1328667402267456, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.12086761742830276, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.06894804537296295, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.06536734104156494, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.07852491736412048, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.07208127528429031, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.06972794234752655, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.06165013089776039, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.059095267206430435, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.0401809886097908, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.034717489033937454, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.03331228718161583, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03297300264239311, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.0201508067548275, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.017613187432289124, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.017335381358861923, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.01620081253349781, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.015994764864444733, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.010882181115448475, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.011410306207835674, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.010408516973257065, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.008341167122125626, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.034717489033937454, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.034717489033937454, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.16.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.22353529930114746, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.21045640110969543, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.20561246573925018, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.18697363138198853, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.1056983545422554, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.10080994665622711, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.11859709024429321, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.10916224867105484, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.10668490827083588, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.09467041492462158, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.09063423424959183, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06046202406287193, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.052227068692445755, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.05065498873591423, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.050280071794986725, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.030210833996534348, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.026042750105261803, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.025684203952550888, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.023807324469089508, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.023569509387016296, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.015871087089180946, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.015722984448075294, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.015331832692027092, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.010353848338127136, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.030210833996534348, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.030210833996534348, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9857495427131653, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.16.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.21684885025024414, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.18997912108898163, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.17808525264263153, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.15727604925632477, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.09895331412553787, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.08764635026454926, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.12106259167194366, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.10985559970140457, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.10238843411207199, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.08281078934669495, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.0789555013179779, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.06186564266681671, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.05285065248608589, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.0478704459965229, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.04661903902888298, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03131573647260666, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.025478767231106758, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.02486809715628624, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.02239772491157055, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.021596567705273628, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.017259059473872185, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.017180239781737328, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.015619037672877312, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.012403969652950764, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03131573647260666, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03131573647260666, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "6": 32 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.17.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.09719319641590118, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.08902612328529358, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.08379633724689484, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.0753072053194046, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.04517319053411484, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.04104794189333916, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.05591215938329697, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.05102122202515602, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.04598886892199516, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.03950902447104454, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.03803509473800659, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.02839518152177334, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.024396859109401703, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.021764332428574562, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.021105347201228142, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.014216025359928608, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.011385162360966206, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.010928583331406116, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.010266540572047234, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.009848020039498806, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.007462154142558575, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.00749414786696434, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.006529516074806452, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.005075700115412474, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.02839518152177334, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.02839518152177334, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.17.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.08066076040267944, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.07348494976758957, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.06778941303491592, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.0609961561858654, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.03720603883266449, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03295150771737099, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.048335377126932144, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.0439913384616375, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.03798450902104378, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03254314512014389, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.03152354061603546, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.02445230633020401, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.021013086661696434, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.01791243813931942, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.017117008566856384, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.012205416336655617, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.00935449916869402, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.008820985443890095, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.008438384160399437, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.00792487058788538, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.006382480729371309, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.006307401228696108, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.005299045704305172, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.00412655808031559, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.03720603883266449, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.03720603883266449, - "qparams": { - "group_size": 32, + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, "bits": [ - 4, - 3 + 6, + 5 ], "bits_prop": [ 0.1, @@ -55810,47437 +2915,59525 @@ ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.17.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.18751879036426544, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.1694306582212448, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.16013428568840027, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.14425744116306305, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.08643660694360733, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.07827628403902054, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.10667339712381363, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.095990389585495, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.08855702728033066, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.07496681809425354, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.07208696752786636, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.05436358228325844, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.045796047896146774, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.04148655757308006, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.040401309728622437, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.02713271602988243, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.021284297108650208, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.020467838272452354, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.018959196284413338, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.01825045607984066, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.013946675695478916, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.013368450105190277, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.012156933546066284, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.008398723788559437, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.02713271602988243, - "qparams": { - "group_size": 128, + }, + { + "accuracy": 0.9863547682762146, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.02713271602988243, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "6": 128 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.17.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.21806906163692474, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.19373054802417755, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.18377238512039185, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.15864311158657074, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.1018238514661789, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.09237790107727051, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.12146998941898346, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.11007588356733322, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.10426122695207596, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.08377886563539505, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.07946746051311493, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.0625852718949318, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.05318615585565567, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.049444831907749176, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.048548921942710876, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.03141726553440094, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.02638537995517254, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.025665737688541412, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.022936420515179634, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.02233685366809368, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.017021920531988144, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.017652863636612892, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.015831807628273964, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.013017669320106506, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.03141726553440094, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "6": 128 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.03141726553440094, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "6": 128 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.17.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.147676020860672, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.13824643194675446, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.13453878462314606, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.12244905531406403, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.06957966089248657, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.06599506735801697, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.0791771188378334, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.07274307310581207, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.07039356231689453, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.062307462096214294, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.059767428785562515, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.04045696556568146, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03499312698841095, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.03358035907149315, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03324199840426445, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.020326515659689903, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.017709512263536453, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.017433572560548782, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.01629682630300522, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.016087833791971207, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.011015165597200394, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.011408522725105286, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.010541913099586964, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.008271528407931328, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03499312698841095, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03499312698841095, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + }, + { + "accuracy": 0.9900280237197876, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.17.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.22856156527996063, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.21515586972236633, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.21027801930904388, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.1913147270679474, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.10794094949960709, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.10306539386510849, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.12129514664411545, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.11148713529109955, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.10899435728788376, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.09690415114164352, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.09293854236602783, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06193447485566139, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.053434085100889206, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.05185882747173309, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.0514853410422802, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.031109096482396126, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.026911552995443344, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.02657017484307289, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.02470795437693596, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.024481330066919327, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.01667611300945282, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.01662333495914936, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.01615617424249649, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.011429891921579838, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.031109096482396126, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "6": 32 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.031109096482396126, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "8": 32 + }, "bits": [ - 5 + 8 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.17.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.22861884534358978, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.1987270712852478, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.1857922226190567, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.1635395586490631, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.1043255627155304, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.09197928011417389, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.12883539497852325, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.11579111963510513, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.10819299519062042, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.08655355125665665, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.08248947560787201, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.06569888442754745, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.05572359636425972, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.05053333938121796, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.04924418032169342, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03320939093828201, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.02701175957918167, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.026395520195364952, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.02364167757332325, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.022812265902757645, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.01836327277123928, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.018333496525883675, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.016659824177622795, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.013402527198195457, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03320939093828201, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "6": 32 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03320939093828201, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9940544962882996, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, "bits": [ - 5 + 8 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.18.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.10961230099201202, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.10096506774425507, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.09561514109373093, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.08589242398738861, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.05118222162127495, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.046931806951761246, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.06293370574712753, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.05715493857860565, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.052044712007045746, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.04497621953487396, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.043228212743997574, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.032084640115499496, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.027462588623166084, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.024750966578722, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.024084245786070824, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.016129883006215096, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.013100868090987206, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.012640770524740219, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.011890493333339691, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.01147786807268858, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.00860824529081583, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.008756929077208042, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.007641905918717384, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.006208454258739948, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.032084640115499496, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.032084640115499496, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.18.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.08908259868621826, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.08077686280012131, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.07374618202447891, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.0665275901556015, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.04097263887524605, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03587948903441429, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.054035019129514694, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.04934355989098549, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.04191283881664276, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03583475574851036, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.034850113093853, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.027334462851285934, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.02352229878306389, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.019732797518372536, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.018745828419923782, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.013667164370417595, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.01030207984149456, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.009652667678892612, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.009291697293519974, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.008660245686769485, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.007123690564185381, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.007013808470219374, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.005821650382131338, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.004559669177979231, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03583475574851036, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03583475574851036, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 + "bits_prop": [ + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.18.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.20284241437911987, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.18381336331367493, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.17456947267055511, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.15790121257305145, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.09444983303546906, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.08590646088123322, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.11570693552494049, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.10367657989263535, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.09652965515851974, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.08205005526542664, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.0791197344660759, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.0591270849108696, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.049568016082048416, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.04532349854707718, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.04425593465566635, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.029571129009127617, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.023218290880322456, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.022411402314901352, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.020682021975517273, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.020005840808153152, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.01527650561183691, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.01443625707179308, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.013447972945868969, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.009067455306649208, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.029571129009127617, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "8": 128 + }, "bits": [ - 5 + 8 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.029571129009127617, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "8": 128 + }, "bits": [ - 5 + 8 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } } - }, - { - "key": "model.layers.18.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.19537383317947388, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.17431773245334625, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.16725459694862366, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.1420021802186966, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.09004351496696472, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.08303036540746689, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.10506200045347214, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.09531540423631668, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.09189039468765259, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.07433382421731949, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.06953312456607819, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.05406254529953003, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.04622052609920502, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.04386579617857933, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.04330521076917648, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.027407990768551826, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.023757033050060272, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.023268623277544975, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.020836463198065758, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.02049286849796772, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.015479274094104767, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.01617700420320034, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.014736676588654518, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.012483297847211361, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.027407990768551826, - "qparams": { - "group_size": 128, + ], + "model.layers.1.mlp": [ + { + "accuracy": 0.9703198671340942, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.05, + 0.95 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.027407990768551826, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9724442362785339, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9743745923042297, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9748512506484985, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906695485115051, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9924629926681519, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934874773025513, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950789213180542, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959225654602051, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961481094360352, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961869716644287, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977733492851257, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978715777397156, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998347818851471, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985383749008179, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998670756816864, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988479018211365, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.2.self_attn": [ + { + "accuracy": 0.9900375604629517, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908539652824402, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917138814926147, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937248229980469, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953514933586121, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951545596122742, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965099692344666, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967316389083862, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970675706863403, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971886277198792, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972314238548279, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974043965339661, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978430271148682, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980480074882507, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988212585449219, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989017248153687, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990324974060059, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992272853851318, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994848966598511, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.2.mlp": [ + { + "accuracy": 0.984533429145813, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9849332571029663, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9876588582992554, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9885455369949341, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.992353081703186, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929441213607788, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.994122326374054, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960052371025085, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964119791984558, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961105585098267, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966014623641968, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980276823043823, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981832504272461, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988487958908081, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989181160926819, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.999104380607605, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993896484375, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.3.self_attn": [ + { + "accuracy": 0.9862762093544006, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871528148651123, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.988358199596405, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922603368759155, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.992903470993042, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930241703987122, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951269030570984, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952595233917236, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957238435745239, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959831237792969, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963065981864929, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966176152229309, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967560172080994, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970136284828186, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981380105018616, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983540773391724, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984897971153259, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988787174224854, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991750717163086, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.3.mlp": [ + { + "accuracy": 0.9783309698104858, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.978900134563446, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9824334979057312, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.983576774597168, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.989159882068634, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900171756744385, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915223121643066, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943568110466003, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949248433113098, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944800734519958, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951927661895752, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972068667411804, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974539279937744, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983833432197571, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984737038612366, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987136721611023, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991561770439148, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.4.self_attn": [ + { + "accuracy": 0.9853151440620422, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9860808849334717, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878317713737488, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913360476493835, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925276041030884, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9924848675727844, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947624206542969, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947445392608643, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952282905578613, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955371022224426, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.996146023273468, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963873028755188, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965980052947998, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969950318336487, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981296062469482, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982879161834717, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984954595565796, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988174438476562, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991284608840942, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.4.mlp": [ + { + "accuracy": 0.9727437496185303, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9735307097434998, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.978121817111969, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9795686602592468, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9863850474357605, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9874873757362366, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9894081354141235, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928821325302124, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.99359530210495, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930700063705444, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.993964672088623, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964849352836609, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967941641807556, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979469776153564, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980674982070923, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983720183372498, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989097118377686, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.5.self_attn": [ + { + "accuracy": 0.9791058897972107, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9801602363586426, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9821765422821045, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9876219630241394, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9895280003547668, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9897540211677551, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932929873466492, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935899972915649, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941936135292053, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943014979362488, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948320388793945, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951465129852295, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955857396125793, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957904815673828, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974867105484009, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975324869155884, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981200695037842, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982038140296936, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989498853683472, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.5.mlp": [ + { + "accuracy": 0.9669961929321289, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.967944860458374, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9734864830970764, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9752030968666077, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9835737943649292, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9848706722259521, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871665239334106, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914288520812988, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922659397125244, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.991642951965332, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.992716372013092, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957721829414368, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.996171772480011, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975630640983582, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977030754089355, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980728626251221, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987555146217346, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.6.self_attn": [ + { + "accuracy": 0.9771797060966492, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9785295724868774, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9806687235832214, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9865107536315918, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9888576865196228, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9889388084411621, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927560091018677, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928895831108093, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935213327407837, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938138723373413, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942401051521301, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945765733718872, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949877858161926, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952473044395447, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972100257873535, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972156286239624, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.99783855676651, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978390336036682, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985849261283875, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.6.mlp": [ + { + "accuracy": 0.9630817770957947, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.964099645614624, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.970582902431488, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9725767374038696, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9815937280654907, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9830909371376038, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9857439398765564, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903808236122131, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913470149040222, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906216263771057, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.991847813129425, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952542185783386, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956900477409363, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972473978996277, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974108934402466, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.99784255027771, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985712766647339, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.7.self_attn": [ + { + "accuracy": 0.9736393094062805, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9751064777374268, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.97762531042099, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9842953085899353, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.986538290977478, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.986786425113678, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909218549728394, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912269711494446, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9920658469200134, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926653504371643, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933061003684998, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938615560531616, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942512512207031, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.994676411151886, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967705607414246, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.996920645236969, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975546598434448, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977102279663086, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985471367835999, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.7.mlp": [ + { + "accuracy": 0.9589145183563232, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9600588083267212, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9669245481491089, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9690402746200562, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9795185923576355, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9811975359916687, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9839912056922913, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9893244504928589, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903857111930847, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9895642995834351, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909353852272034, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947165250778198, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951986074447632, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.996930718421936, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971113204956055, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975644946098328, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983987212181091, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.8.self_attn": [ + { + "accuracy": 0.9715123772621155, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9729994535446167, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9756790399551392, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9833037853240967, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9858036041259766, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9861367344856262, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907304048538208, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9911977648735046, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919226765632629, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922053813934326, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928337335586548, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932734370231628, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936707615852356, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941463470458984, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964893460273743, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966834187507629, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997372031211853, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976113438606262, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983444213867188, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.8.mlp": [ + { + "accuracy": 0.9554929733276367, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9567272663116455, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9642322659492493, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.966547966003418, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9778204560279846, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.979622483253479, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9826808571815491, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9884400367736816, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9895856976509094, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9887033104896545, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901760220527649, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942710399627686, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947928786277771, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966679215431213, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968520998954773, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973439574241638, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982328414916992, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.9.self_attn": [ + { + "accuracy": 0.9693062901496887, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.970729410648346, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.973220705986023, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9806281328201294, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9850863814353943, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9853408336639404, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9905824065208435, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909061789512634, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915569424629211, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916732311248779, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925299286842346, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929314255714417, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933764338493347, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938421249389648, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964480400085449, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965616464614868, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975306987762451, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975209832191467, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998803436756134, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.9.mlp": [ + { + "accuracy": 0.9524677991867065, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9538310170173645, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9619337916374207, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9644193649291992, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9763040542602539, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9782448410987854, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9815577268600464, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9876535534858704, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9888485074043274, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9879247546195984, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9895080924034119, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938750863075256, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944368600845337, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964278340339661, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.996644914150238, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971753358840942, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981309175491333, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.10.self_attn": [ + { + "accuracy": 0.9638569355010986, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.965466320514679, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9689413905143738, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9782928824424744, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9820681810379028, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9823836088180542, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9888140559196472, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892522096633911, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9902700781822205, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908148646354675, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9910318851470947, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916226863861084, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921797513961792, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.992790937423706, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956189393997192, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995980441570282, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968297481536865, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997138261795044, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979228973388672, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.10.mlp": [ + { + "accuracy": 0.9504137635231018, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9518914818763733, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9607448577880859, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9634970426559448, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.975249171257019, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9773033261299133, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.980927050113678, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9869520664215088, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9883050918579102, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9873741269111633, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9890349507331848, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935863018035889, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941607117652893, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.99623042345047, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964818358421326, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970704317092896, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980186820030212, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.11.self_attn": [ + { + "accuracy": 0.9595574736595154, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9612486362457275, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9645552039146423, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.974942147731781, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9796185493469238, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9801087379455566, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9865255951881409, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871801137924194, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9882009625434875, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9891430735588074, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9898891448974609, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9905411005020142, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9911357164382935, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9918347597122192, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949962496757507, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953417778015137, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.996269941329956, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965945482254028, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982367753982544, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.11.mlp": [ + { + "accuracy": 0.9490307569503784, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9505550265312195, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9597024321556091, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9625310897827148, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.974552571773529, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9766661524772644, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9804140329360962, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9865413904190063, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9879382252693176, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9870026707649231, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9887055158615112, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933755397796631, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939532279968262, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960675239562988, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963611364364624, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969674348831177, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979198575019836, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.12.self_attn": [ + { + "accuracy": 0.9557563066482544, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9577322006225586, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9616643190383911, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.971433162689209, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9779186844825745, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9780861139297485, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9856311678886414, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9858606457710266, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871066808700562, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878600835800171, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9885560870170593, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9894586205482483, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9899231195449829, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907426834106445, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943788051605225, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948565363883972, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957751035690308, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965121150016785, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979587197303772, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.12.mlp": [ + { + "accuracy": 0.9450268149375916, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9467305541038513, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9561208486557007, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9590578675270081, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9725443720817566, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9748436808586121, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9787108302116394, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9855126738548279, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.986987829208374, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9859724640846252, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878173470497131, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928443431854248, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934665560722351, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957410097122192, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960489273071289, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966649413108826, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997717559337616, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.13.self_attn": [ + { + "accuracy": 0.9552391767501831, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9572742581367493, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9615687131881714, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9732059240341187, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9778205156326294, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.978171169757843, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9864978790283203, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.986967921257019, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9880002737045288, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9887223839759827, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.98896723985672, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896665811538696, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9904374480247498, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.991020143032074, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.994779109954834, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951836466789246, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964016675949097, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967215657234192, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982174038887024, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.13.mlp": [ + { + "accuracy": 0.9405258893966675, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9423239827156067, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9526388645172119, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9559372067451477, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9701609015464783, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9726821184158325, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9769512414932251, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9841681718826294, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9858324527740479, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9847643375396729, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9867731928825378, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922424554824829, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929128289222717, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953896403312683, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957048296928406, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963961839675903, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975093603134155, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.14.self_attn": [ + { + "accuracy": 0.9470834136009216, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.949529767036438, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9543803334236145, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9678031206130981, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9734838008880615, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9739448428153992, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9838231801986694, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9844220280647278, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.985498309135437, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9863905310630798, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9866253137588501, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9875165820121765, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9884134531021118, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9893634915351868, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934873580932617, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.994227945804596, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953527450561523, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961115121841431, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978159070014954, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.14.mlp": [ + { + "accuracy": 0.9371218085289001, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9389986991882324, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.949920117855072, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9534218311309814, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9684104323387146, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9711309671401978, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.975614070892334, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9830725193023682, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9848687648773193, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9838681221008301, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9859519600868225, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917041659355164, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923370480537415, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949150085449219, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953028559684753, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959975481033325, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970805048942566, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.15.self_attn": [ + { + "accuracy": 0.9456266164779663, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9480615854263306, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9533383250236511, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.965390682220459, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9727036356925964, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9732003808021545, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9828101396560669, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9833673238754272, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9846935868263245, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9853549599647522, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9862855076789856, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871431589126587, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9880532622337341, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9889615178108215, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934453368186951, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.993971049785614, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952077269554138, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958668351173401, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978691935539246, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.15.mlp": [ + { + "accuracy": 0.9332342743873596, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9351798892021179, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9465667605400085, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9502391219139099, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.966607391834259, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9693454504013062, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9740636944770813, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9823034405708313, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.984076201915741, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9829569458961487, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9851566553115845, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913040995597839, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9920278191566467, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.994810163974762, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951891303062439, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959383606910706, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971897006034851, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.16.self_attn": [ + { + "accuracy": 0.9488327503204346, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.950686514377594, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9558826088905334, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9680045247077942, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9742181897163391, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9747812747955322, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9844616055488586, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9851713180541992, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9861835837364197, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9867813587188721, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871595501899719, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9879596829414368, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9889702796936035, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9897680282592773, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939336776733398, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945061206817627, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957642555236816, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963384866714478, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997974157333374, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.16.mlp": [ + { + "accuracy": 0.9272598624229431, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9294061660766602, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9417588710784912, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9458762407302856, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.963624119758606, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.966629147529602, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9717462062835693, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.980557918548584, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9824984073638916, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9814392924308777, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9838128089904785, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9904978275299072, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912627935409546, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942458868026733, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947471022605896, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955606460571289, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968914985656738, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.17.self_attn": [ + { + "accuracy": 0.9452739357948303, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9479079842567444, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9530227780342102, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.967292308807373, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9724723696708679, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9730525612831116, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9823420643806458, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9831523895263672, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9845689535140991, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9855678081512451, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.986076831817627, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9870094060897827, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9877439141273499, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9889193773269653, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929871559143066, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939438104629517, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947290420532227, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995849609375, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970279335975647, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.17.mlp": [ + { + "accuracy": 0.9223220944404602, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9246417284011841, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9379340410232544, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9426669478416443, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9609549045562744, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.964194655418396, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9697945713996887, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9789257645606995, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.98108971118927, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9800138473510742, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9825708866119385, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9897059798240662, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9904915690422058, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936568737030029, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941855669021606, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950640797615051, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963690638542175, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.18.self_attn": [ + { + "accuracy": 0.94408118724823, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9466457366943359, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9516456127166748, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9640782475471497, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9720502495765686, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9726921916007996, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.981819748878479, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9827513694763184, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9840433597564697, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9850029945373535, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9858003258705139, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9867492914199829, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.987493634223938, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9883816242218018, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.992755115032196, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935425519943237, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943405389785767, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995483934879303, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972608089447021, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.18.mlp": [ + { + "accuracy": 0.9186995029449463, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9211822152137756, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9351019859313965, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9402898550033569, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9591230154037476, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9625265598297119, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.968386173248291, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9776533842086792, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9800076484680176, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9790563583374023, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9817141890525818, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9891507625579834, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9899314045906067, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931818246841431, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937616586685181, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946668148040771, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959447383880615, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.19.self_attn": [ + { + "accuracy": 0.9490517973899841, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.951048731803894, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9559349417686462, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9680829048156738, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9739089012145996, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9746545553207397, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9833163022994995, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9842910170555115, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9855719804763794, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9861876368522644, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871087074279785, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9879531860351562, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.988820493221283, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9895066618919373, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934722781181335, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939818978309631, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950556755065918, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956225156784058, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973382949829102, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.19.mlp": [ + { + "accuracy": 0.9183827638626099, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9208594560623169, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9347587823867798, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.939883291721344, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9590166807174683, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9624627828598022, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9683180451393127, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9776405692100525, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9799777865409851, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9790009260177612, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9816671013832092, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9890996813774109, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9899163246154785, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931293725967407, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937893748283386, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946932196617126, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959895014762878, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.20.self_attn": [ + { + "accuracy": 0.9514031410217285, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9536084532737732, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9574352502822876, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9697067737579346, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9757400155067444, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9759773015975952, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9843240976333618, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9845936894416809, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9861356616020203, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.986490786075592, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9875079393386841, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9883168339729309, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9887577891349792, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9897370338439941, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939215183258057, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.994412362575531, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995498776435852, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962353706359863, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978194832801819, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.20.mlp": [ + { + "accuracy": 0.9195817112922668, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9218739867210388, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9351979494094849, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9398000836372375, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9597527384757996, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9629899263381958, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9686428904533386, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9785364270210266, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9806663393974304, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9794799089431763, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9820805191993713, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9895047545433044, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903888702392578, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936806559562683, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941885471343994, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950851202011108, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965639710426331, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.21.self_attn": [ + { + "accuracy": 0.9564099311828613, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9582138061523438, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9619127511978149, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9727891087532043, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9781115651130676, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9783715605735779, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9857781529426575, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9860761165618896, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9870508909225464, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9876623153686523, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9888306856155396, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9895981550216675, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901157021522522, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.99090975522995, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944233894348145, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950221180915833, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957414269447327, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966710805892944, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977868795394897, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.21.mlp": [ + { + "accuracy": 0.9204163551330566, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9225960969924927, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9355134963989258, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9397560954093933, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9601695537567139, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9633379578590393, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9688054919242859, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9790127873420715, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9810447692871094, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.979697048664093, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.982274055480957, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896356463432312, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9905593991279602, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938428401947021, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942983388900757, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951584339141846, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967150092124939, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.22.self_attn": [ + { + "accuracy": 0.9637552499771118, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9659462571144104, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.969290018081665, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9771500825881958, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9816005229949951, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9819766879081726, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.987238347530365, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9876794219017029, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9888818264007568, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892641305923462, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906289577484131, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912598133087158, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916530847549438, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922248125076294, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952769875526428, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957273602485657, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962239265441895, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970911145210266, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983139634132385, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.22.mlp": [ + { + "accuracy": 0.9216684103012085, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9238190650939941, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9360491037368774, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9399940371513367, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.960805356502533, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9638703465461731, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9690302014350891, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.979417085647583, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9814004898071289, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.980050265789032, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9825678467750549, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9898810386657715, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907958507537842, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941083788871765, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944785237312317, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953035116195679, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969333410263062, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.23.self_attn": [ + { + "accuracy": 0.961937427520752, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9638397097587585, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9677366614341736, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9762910008430481, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9806434512138367, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9813516139984131, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.986984372138977, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878765940666199, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9887995719909668, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.989104151725769, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9905569553375244, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9911107420921326, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916852712631226, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922620058059692, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952684044837952, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957107305526733, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963122010231018, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969786405563354, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981492757797241, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.23.mlp": [ + { + "accuracy": 0.922935426235199, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9250244498252869, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9367407560348511, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9405222535133362, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9613974094390869, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9644268751144409, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9693678617477417, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.979735255241394, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9816800951957703, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9803524017333984, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9828374981880188, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900204539299011, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909392595291138, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941734075546265, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945809245109558, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953716397285461, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997025191783905, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.24.self_attn": [ + { + "accuracy": 0.9636750817298889, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.965139627456665, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9688884019851685, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9769046902656555, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9810408353805542, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9816277623176575, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9875279068946838, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9882361888885498, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892072677612305, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9897688627243042, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907890558242798, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912906885147095, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921635985374451, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927007555961609, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955598711967468, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960348606109619, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965972304344177, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972603917121887, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983892440795898, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.24.mlp": [ + { + "accuracy": 0.9236878156661987, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9258185625076294, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9371255040168762, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9407856464385986, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9618233442306519, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9648433327674866, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9696035385131836, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9799754023551941, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9818691611289978, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9805850982666016, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9830421805381775, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901365041732788, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9910632967948914, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942300319671631, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946343898773193, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953908920288086, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970380067825317, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.25.self_attn": [ + { + "accuracy": 0.964160680770874, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9656925201416016, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9688834547996521, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9764288067817688, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9811409711837769, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9820259809494019, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871474504470825, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9882672429084778, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9891765713691711, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9895553588867188, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909759163856506, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914993047714233, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922427535057068, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925106763839722, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954383969306946, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959017634391785, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964326024055481, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971325397491455, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981990456581116, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.25.mlp": [ + { + "accuracy": 0.9234814643859863, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9256763458251953, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9366424679756165, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9402738213539124, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9617564082145691, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9647548198699951, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9693777561187744, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9798400402069092, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9817804098129272, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9805455207824707, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9830023646354675, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901317358016968, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9910440444946289, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942378997802734, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946427941322327, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953869581222534, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970635771751404, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.26.self_attn": [ + { + "accuracy": 0.9617835283279419, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9637356996536255, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9670659899711609, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9754918217658997, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9806798100471497, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.981212317943573, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9870488047599792, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9876936674118042, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9887855648994446, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9894344210624695, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903768301010132, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909326434135437, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916174411773682, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922494888305664, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953054785728455, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957162141799927, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963787794113159, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969689249992371, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983627200126648, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.26.mlp": [ + { + "accuracy": 0.9232931733131409, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9254870414733887, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9362118244171143, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9397788643836975, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9616162776947021, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9646404981613159, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9691728949546814, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9797552227973938, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9816688299179077, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.980451226234436, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.982917845249176, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900416135787964, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909524917602539, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941202998161316, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945303201675415, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952455759048462, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968776702880859, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.27.self_attn": [ + { + "accuracy": 0.9637154936790466, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9654697179794312, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.96950763463974, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9775655269622803, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9818878769874573, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9823586940765381, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9880202412605286, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9886980652809143, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.989515483379364, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9898940324783325, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909783005714417, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914613962173462, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921662211418152, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.992761492729187, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956316947937012, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961001873016357, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967173933982849, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974233508110046, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984723925590515, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.27.mlp": [ + { + "accuracy": 0.9232266545295715, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9254940748214722, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9361053705215454, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9397132992744446, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9615546464920044, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9645829200744629, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9690989255905151, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.979605495929718, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.981548011302948, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9803990125656128, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9828706383705139, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900168180465698, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909051060676575, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940760731697083, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945307970046997, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952475428581238, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968824982643127, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.28.self_attn": [ + { + "accuracy": 0.9570818543434143, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9591004848480225, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9633428454399109, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9733706116676331, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9786383509635925, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9790573716163635, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9861310124397278, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9867442846298218, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9876741766929626, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9884549379348755, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892730116844177, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900143146514893, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907545447349548, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913690686225891, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949092864990234, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953032732009888, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962019920349121, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967765808105469, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981407523155212, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.28.mlp": [ + { + "accuracy": 0.9197556972503662, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9221832752227783, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.933716893196106, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9376707673072815, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9598106145858765, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9630042910575867, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9679097533226013, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9785321950912476, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9805989265441895, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9795040488243103, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9820718169212341, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9895117282867432, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.990413248538971, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936687350273132, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941309690475464, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948904514312744, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964609742164612, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.29.self_attn": [ + { + "accuracy": 0.9480156898498535, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9503364562988281, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9566341638565063, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9683546423912048, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9738256335258484, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9743095636367798, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9833172559738159, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9840244650840759, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9854320287704468, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9859579205513, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9870347380638123, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878800511360168, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9889588356018066, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.989793598651886, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938756823539734, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945396780967712, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995446503162384, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963279962539673, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978930354118347, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.29.mlp": [ + { + "accuracy": 0.9201555252075195, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9225545525550842, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9344040751457214, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9383991360664368, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9598702788352966, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9630522131919861, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9681186079978943, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9784958958625793, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9805541634559631, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9794653654098511, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9819895625114441, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.989483118057251, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901388883590698, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935810565948486, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941340088844299, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949149489402771, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.996442973613739, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.30.self_attn": [ + { + "accuracy": 0.9414986968040466, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9445879459381104, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9553024768829346, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9664435982704163, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9736520648002625, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9738694429397583, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9820189476013184, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.982109010219574, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9841391444206238, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9845623970031738, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9867170453071594, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878985285758972, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9888191223144531, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9895944595336914, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.993811845779419, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942244291305542, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952475428581238, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958891272544861, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975190758705139, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.30.mlp": [ + { + "accuracy": 0.9209880828857422, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9231677055358887, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9347788095474243, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.938624382019043, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9596282839775085, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9629557728767395, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9679611921310425, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9778137803077698, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9801861047744751, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9791034460067749, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9815981984138489, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.988993763923645, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, 5 ], "bits_prop": [ - 1.0 + 0.05, + 0.1, + 0.85 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.18.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.1374925822019577, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.12856176495552063, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.12479405850172043, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.11346160620450974, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.06476005166769028, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.06128024309873581, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.07391566038131714, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.06805061548948288, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.0655723512172699, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.05788882449269295, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.055533647537231445, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.037744469940662384, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.032701749354600906, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.031223267316818237, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.030886035412549973, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.01892118714749813, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.016427485272288322, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.01613978110253811, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.015078910626471043, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.014863798394799232, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.010164116509258747, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.01053904090076685, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.00968207512050867, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.007554063107818365, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.032701749354600906, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.032701749354600906, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + }, + { + "accuracy": 0.9894065260887146, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927865862846375, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934835433959961, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942030906677246, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954591989517212, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 ], "scale_bits": 4 } } - }, - { - "key": "model.layers.18.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.21820107102394104, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.2050238996744156, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.2001328021287918, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.18214218318462372, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.10294309258460999, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.09807790815830231, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.11611451208591461, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.10672638565301895, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.10402285307645798, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.09228337556123734, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.0885280892252922, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.05919875577092171, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.05111004412174225, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.049416717141866684, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.04901818186044693, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.029674813151359558, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.025537021458148956, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.025165516883134842, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.023387964814901352, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.023134944960474968, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.01580570638179779, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.015639670193195343, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.015234248712658882, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.010530409403145313, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.029674813151359558, - "qparams": { - "group_size": 128, + ], + "model.layers.31.self_attn": [ + { + "accuracy": 0.9493929147720337, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9516201019287109, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9574496746063232, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9710063934326172, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.029674813151359558, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.18.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.21774938702583313, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.18709535896778107, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.17304351925849915, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.15223371982574463, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.09874185174703598, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.08579084277153015, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.12437142431735992, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.11183854192495346, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.1029086783528328, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.08156735450029373, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.07802098244428635, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.06372956186532974, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.054010625928640366, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.048016518354415894, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.04651014506816864, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.032429859042167664, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.02599930763244629, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.025272857397794724, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.022808833047747612, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.021856937557458878, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.018173228949308395, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.018188631162047386, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.01621237024664879, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.013586694374680519, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.032429859042167664, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.032429859042167664, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9757025241851807, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.19.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.11348071694374084, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.10485699772834778, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.09927164763212204, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.08967964351177216, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.052964452654123306, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.04859105497598648, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.0651138424873352, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.05929850786924362, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.053840044885873795, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.04684876278042793, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.04512515664100647, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03315485268831253, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.028377003967761993, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.02553233504295349, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.024815890938043594, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.016622181981801987, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.013344730250537395, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.012850245460867882, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.012132370844483376, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.011687414720654488, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.00880383886396885, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.008731470443308353, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.007788805291056633, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.005928258411586285, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03315485268831253, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03315485268831253, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.19.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.08765597641468048, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.08026088774204254, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.07328673452138901, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.06635505706071854, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.04041486233472824, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.035504478961229324, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.053729843348264694, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.04906598851084709, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.041244473308324814, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03572402149438858, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.03478245809674263, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.027216903865337372, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.023352622985839844, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.019497154280543327, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.01845386251807213, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.013626028783619404, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.010153229348361492, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.009492410346865654, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.009221634827554226, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.008579310029745102, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.007103398907929659, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.006917467340826988, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.005775098223239183, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.004447015002369881, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03572402149438858, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03572402149438858, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.19.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.213727205991745, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.19518202543258667, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.18678595125675201, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.16899199783802032, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.09962575137615204, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.09181156009435654, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.11944492161273956, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.10759676992893219, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.10155556350946426, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.08709283918142319, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.0837373286485672, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.060877613723278046, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.05135972052812576, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.04771966114640236, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.0468452125787735, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03042612411081791, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.024326970800757408, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.023607786744832993, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.02175804227590561, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.021180760115385056, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.015592603012919426, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.01481527741998434, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.013961946591734886, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.009088855236768723, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03042612411081791, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03042612411081791, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.19.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.21072103083133698, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.19164325296878815, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.18523627519607544, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.15968374907970428, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.09862709790468216, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.09181611984968185, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.11384426057338715, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.10272415727376938, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.0999821349978447, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.08301432430744171, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.0773761197924614, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.058739073574543, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.049972131848335266, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.04812727868556976, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.047716911882162094, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.02968384511768818, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.026120854541659355, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.025701461359858513, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.023201219737529755, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.022945940494537354, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.016584079712629318, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.01772530935704708, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.016010992228984833, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.013814602978527546, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.02968384511768818, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.02968384511768818, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9761213660240173, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.19.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.13333599269390106, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.12472177296876907, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.1212131455540657, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.11031914502382278, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.06281222403049469, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.059456486254930496, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.07148183882236481, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.0657590851187706, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.06354662775993347, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.056148041039705276, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.05381622165441513, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.03649694100022316, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.0315772108733654, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.030252262949943542, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.029936211183667183, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.01829606480896473, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.015857409685850143, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.015588946640491486, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.014544716104865074, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.014346062205731869, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.009828917682170868, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.010083322413265705, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.009387016296386719, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.0071491156704723835, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.03649694100022316, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.03649694100022316, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.19.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.2150549441576004, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.20201581716537476, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.197183758020401, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.1795477420091629, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.10147202014923096, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.09667687118053436, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.11435475200414658, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.10517162084579468, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.10254520922899246, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.09100963920354843, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.08723907917737961, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.05837218463420868, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.0504077784717083, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.04876577481627464, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.04837408289313316, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.029280023649334908, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.02531832829117775, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.024957459419965744, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.023218337446451187, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.022976620122790337, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.01573902741074562, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.015674691647291183, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.015189528465270996, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.010784424841403961, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.029280023649334908, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.029280023649334908, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.19.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.20603756606578827, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.1764575093984604, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.16282005608081818, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.14249272644519806, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.09367168694734573, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.08080990612506866, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.11757660657167435, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.1060422733426094, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.09756892174482346, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.07660102844238281, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.07307349145412445, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.060241784900426865, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.05134331062436104, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.045644547790288925, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.044198233634233475, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.030730711296200752, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.024832414463162422, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.024130448698997498, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.02167022041976452, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.02074979990720749, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.017181050032377243, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.017484847456216812, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.01529442798346281, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.01317636389285326, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.030730711296200752, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.030730711296200752, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9848417043685913, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.20.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.11145150661468506, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.10248727351427078, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.09592404961585999, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.0870819091796875, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.05188622698187828, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.04686010628938675, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.06590161472558975, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.05976508557796478, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.052841443568468094, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.04592224210500717, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.04454243183135986, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.033563002943992615, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.02859276719391346, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.025064658373594284, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.02416958473622799, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.01683853752911091, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.01322048157453537, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.012622852809727192, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.012060030363500118, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.01149842981249094, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.008958867751061916, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.008922530338168144, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.007689627818763256, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.0061478158459067345, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.033563002943992615, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.033563002943992615, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.20.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.08810478448867798, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.07972514629364014, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.07121668756008148, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.0645846500992775, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.04026168957352638, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03439608961343765, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.056055158376693726, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.05080990865826607, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.0412951298058033, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03542720526456833, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.03489105775952339, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.028327463194727898, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.02413969673216343, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.01947706937789917, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.018184294924139977, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.014177348464727402, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.010193624533712864, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.009378173388540745, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.00924221333116293, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.00843286607414484, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.007387418299913406, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.007148140575736761, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.005739053711295128, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.004577990621328354, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03542720526456833, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03542720526456833, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 + "bits_prop": [ + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.20.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.2065197229385376, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.18589352071285248, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.17601345479488373, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.15899674594402313, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.09527849406003952, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.08633553236722946, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.11670342087745667, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.10510382056236267, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.09752252697944641, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.08249236643314362, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.07950017601251602, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.059504181146621704, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.05009802430868149, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.04564103111624718, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.04456046596169472, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.02978871949017048, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.02340107411146164, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.02251719869673252, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.02076934464275837, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.020043469965457916, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.015314987860620022, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.01455624494701624, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.013382150791585445, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.009056160226464272, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.02978871949017048, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.02978871949017048, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.20.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.2231989949941635, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.1950334906578064, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.18534623086452484, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.1579456627368927, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.10404032468795776, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.09319466352462769, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.12111028283834457, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.10996023565530777, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.10618181526660919, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.08333656191825867, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.07730479538440704, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.06220477819442749, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.05311651900410652, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.05038749799132347, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.04975041002035141, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.031269967555999756, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.02683446742594242, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.026136592030525208, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.02285096049308777, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.02242124080657959, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.0169711634516716, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.01767570525407791, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.016109727323055267, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.01306991558521986, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.031269967555999756, - "qparams": { - "group_size": 128, + }, + { + "accuracy": 0.9853988289833069, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.031269967555999756, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.20.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.13850483298301697, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.1299854964017868, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.1266874074935913, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.11532513052225113, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.06529020518064499, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.062107622623443604, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.07358164340257645, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.06784595549106598, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.06597427278757095, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.05851830542087555, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.05602255091071129, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.037503842264413834, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.032528575509786606, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.03137967735528946, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03110780380666256, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.018771318718791008, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.016340011730790138, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.01609988510608673, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.014991150237619877, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.014827406033873558, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.009988533332943916, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.010205789469182491, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.009603033773601055, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.007098253816366196, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.032528575509786606, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.032528575509786606, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.20.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.22509276866912842, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.21184778213500977, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.2072385847568512, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.18868938088417053, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.10608498752117157, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.1013677716255188, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.11876387149095535, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.10936794430017471, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.10710248351097107, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.09528863430023193, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.09126334637403488, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.060460351407527924, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.05231751501560211, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.05084937438368797, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.05049937218427658, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.030301330611109734, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.026152484118938446, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.02581752836704254, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.02396243065595627, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.023744288831949234, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.01610422693192959, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.015796886757016182, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.015618089586496353, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.010458653792738914, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.030301330611109734, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.030301330611109734, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9865517020225525, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.20.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.2049972116947174, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.17725136876106262, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.16460539400577545, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.14417903125286102, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.09315503388643265, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.08132906258106232, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.11514084786176682, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.10463745892047882, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.0967787429690361, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.07676662504673004, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.07312503457069397, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.058990053832530975, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.05034821480512619, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.04508254677057266, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.043754179030656815, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.029934721067547798, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.024024218320846558, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.023388350382447243, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.02090371772646904, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.02004752866923809, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.01661510579288006, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.01632767543196678, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.014917455613613129, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.011773091740906239, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.029934721067547798, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "4": 64 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.029934721067547798, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.21.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.10752540081739426, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.09820748120546341, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.09161379933357239, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.08317483961582184, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.049986813217401505, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.044869787991046906, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.0637698769569397, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.05774407461285591, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.051005467772483826, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.043965332210063934, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.042702145874500275, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03252435848116875, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.027589209377765656, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.02410547249019146, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.023227619007229805, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.016308557242155075, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.01264879759401083, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.012053512036800385, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.011471735313534737, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.010914156213402748, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.008688269183039665, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.008471256121993065, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.007440335117280483, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.005733801051974297, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03252435848116875, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03252435848116875, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.21.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.08935713022947311, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.08071020990610123, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.07233186066150665, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.0656433254480362, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.041012004017829895, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.035102859139442444, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.05660369619727135, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.051309265196323395, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.04209904000163078, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.035962846130132675, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.03539950028061867, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.028698453679680824, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.02444744110107422, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.019814874976873398, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.018579527735710144, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.014320294372737408, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.010405796580016613, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.009595191106200218, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.009420125745236874, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.008616045117378235, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.007428687997162342, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.007291777990758419, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.005765073467046022, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.004715076647698879, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.035962846130132675, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.035962846130132675, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 + "bits_prop": [ + 1 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.21.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.19867196679115295, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.17716288566589355, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.1659591645002365, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.1497502326965332, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.09109504520893097, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.0812193974852562, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.11444253474473953, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.10284264385700226, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.09365789592266083, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.07831533998250961, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.07573531568050385, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.058588556945323944, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.04901302233338356, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.04367604851722717, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.04231667518615723, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.029300549998879433, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.022370539605617523, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.021360304206609726, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.019776323810219765, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.01890101470053196, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.01503958273679018, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.014141662046313286, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.012720420025289059, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.008749890141189098, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.029300549998879433, - "qparams": { - "group_size": 128, + }, + { + "accuracy": 0.9870437979698181, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.029300549998879433, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.21.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.2297431230545044, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.20255042612552643, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.192813903093338, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.16538387537002563, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.10717587172985077, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.0966724157333374, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.12322276830673218, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.11352111399173737, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.1090894490480423, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.08621813356876373, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.08095244318246841, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.06321549415588379, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.054554518312215805, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.0516405887901783, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.050931502133607864, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.03175421059131622, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.02703980728983879, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.02634424902498722, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.022907203063368797, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.022426793351769447, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.01698097214102745, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.017267538234591484, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.01601741462945938, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.012122386135160923, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.03175421059131622, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.03175421059131622, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.21.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.13743199408054352, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.12928615510463715, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.12621335685253143, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.11484409123659134, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.06485365331172943, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.061827994883060455, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.07282441854476929, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.06715486198663712, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.0654919371008873, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.05818810313940048, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.05577061325311661, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.0371539369225502, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03219684213399887, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.031166253611445427, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.030919261276721954, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.0186143908649683, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.01619957946240902, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.015973703935742378, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.014865946024656296, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.014712884090840816, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.009940817952156067, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.010057127103209496, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.009608233347535133, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.006951375398784876, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.0371539369225502, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.0371539369225502, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + }, + { + "accuracy": 0.9880205988883972, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.21.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.23213404417037964, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.21883858740329742, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.2142142355442047, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.19518107175827026, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.10960764437913895, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.10488921403884888, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.1223391741514206, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.1126762107014656, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.11059151589870453, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.09849753975868225, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.09439245611429214, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06240826100111008, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.053920477628707886, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.05254124104976654, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.052217189222574234, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03125707805156708, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.0270486269146204, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.026717962697148323, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.02479887567460537, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.024591026827692986, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.016671197488904, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.016344362869858742, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.016217805445194244, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.010860339738428593, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03125707805156708, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03125707805156708, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.21.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.2039477676153183, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.1764291673898697, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.16383416950702667, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.14422303438186646, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.09244544804096222, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.08070018887519836, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.11477522552013397, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.10414296388626099, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.09612568467855453, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.07652217894792557, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.07291590422391891, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.05821726471185684, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.04999297112226486, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.044703345745801926, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.043363459408283234, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.029615314677357674, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.023752113804221153, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.023103181272745132, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.020697688683867455, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.019826458767056465, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.016504283994436264, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.016057612374424934, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.01483475137501955, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.011453576385974884, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.029615314677357674, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.029615314677357674, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9884731769561768, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.22.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.10887125134468079, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.09895331412553787, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.09206091612577438, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.08349825441837311, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.05046651139855385, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.04507838189601898, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.06463630497455597, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.058600086718797684, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.05166810005903244, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.04429738596081734, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.04311756789684296, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03298630565404892, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.02805054560303688, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.024369899183511734, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.023433296009898186, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.01655702479183674, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.012811687774956226, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.012184634804725647, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.011602586135268211, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.011009839363396168, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.008790744468569756, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.008638076484203339, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.0074527692049741745, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.005869836546480656, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03298630565404892, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03298630565404892, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.22.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.09703758358955383, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.08759009838104248, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.07863683253526688, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.07140514254570007, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.04460924491286278, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03817092254757881, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.06141945347189903, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.055681467056274414, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.045788127928972244, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03909993916749954, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.038423970341682434, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.0311527568846941, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.026532338932156563, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.021568994969129562, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.02024623565375805, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.015576127916574478, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.011344915255904198, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.010487147606909275, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.0102564487606287, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.009422294795513153, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.008127609267830849, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.007944888435304165, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.006381353363394737, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.005204093176871538, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.0311527568846941, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.0311527568846941, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.22.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.19817571341991425, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.17582307755947113, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.16350750625133514, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.1474759727716446, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.09068059921264648, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.08009342849254608, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.11541128158569336, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.10360980033874512, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.09331444650888443, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.07763168215751648, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.07519368082284927, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.05889742076396942, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.049367330968379974, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.04348165541887283, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.04197990521788597, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.02937602810561657, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.022300440818071365, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.02119162678718567, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.019668197259306908, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.01869206875562668, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.015072188340127468, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.014248315244913101, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.012646478600800037, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.008778486400842667, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.02937602810561657, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.02937602810561657, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9894307851791382, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.22.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.2114296853542328, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.1846105307340622, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.17474260926246643, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.14644183218479156, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.09842614084482193, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.08829297870397568, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.11536823958158493, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.10548406094312668, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.10080424696207047, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.07873178273439407, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.07109116017818451, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.059156663715839386, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.050751373171806335, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.047620318830013275, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.046851035207509995, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.02967168018221855, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.02512362226843834, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.024415910243988037, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.021298250183463097, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.020783057436347008, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.015968099236488342, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.01638237200677395, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.01494449283927679, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.011762101203203201, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.02967168018221855, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.02967168018221855, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "5": 64 + }, "bits": [ 5 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.22.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.14869388937950134, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.13983093202114105, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.13651084899902344, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.12431470304727554, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.07014378160238266, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.06688543409109116, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.07853951305150986, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.07260740548372269, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.07083979994058609, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.06294090300798416, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.060229960829019547, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.039972517639398575, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.034752488136291504, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.033650755882263184, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.033394839614629745, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.01998285762965679, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.017393112182617188, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.017153045162558556, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.015941161662340164, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.015776272863149643, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.010508591309189796, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.010659032501280308, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.010142249055206776, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.007201538886874914, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.034752488136291504, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.034752488136291504, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.22.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.23560725152492523, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.2220294028520584, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.2173743098974228, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.19795629382133484, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.11115945875644684, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.10636626929044724, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.12364494800567627, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.11430145800113678, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.1121913492679596, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.09990733116865158, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.09554319083690643, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06289876252412796, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.0546375997364521, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.05322597175836563, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.052886687219142914, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03140754997730255, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.02727108635008335, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.02694154717028141, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.024971354752779007, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.024759752675890923, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.016375083476305008, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.01630396582186222, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.015906307846307755, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.010604530572891235, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03140754997730255, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03140754997730255, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9901012182235718, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.22.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.2070186287164688, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.18073104321956635, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.16828757524490356, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.1488325446844101, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.09417074173688889, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.0827794000506401, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.1170724630355835, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.10603858530521393, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.09765051305294037, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.07878981530666351, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.07526940852403641, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.05945669487118721, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.05085906758904457, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.045518264174461365, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.04416438564658165, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.030203409492969513, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.02412850223481655, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.023469973355531693, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.021173017099499702, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.020304733887314796, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.016762491315603256, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.016248896718025208, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.015035535208880901, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.011532209813594818, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.030203409492969513, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.030203409492969513, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "5": 32 + }, "bits": [ 5 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.23.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.11033614724874496, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.1008068174123764, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.09427118301391602, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.08553474396467209, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.051231175661087036, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.04607938975095749, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.0648651123046875, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.05893447622656822, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.052326980978250504, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.0450957790017128, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.043725330382585526, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03303619101643562, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.028146570548415184, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.024680154398083687, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.023802848532795906, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.016542844474315643, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.012873167172074318, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.012276043184101582, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.011656645685434341, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.011095772497355938, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.008745132945477962, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.008514080196619034, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.007497869431972504, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.005638442002236843, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03303619101643562, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03303619101643562, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.23.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.09463310241699219, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.08537431806325912, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.07765500992536545, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.07039429247379303, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.04347987473011017, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03773343190550804, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.058076731860637665, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.05279702693223953, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.04466409608721733, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03802194818854332, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.03715066984295845, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.029483992606401443, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.025175178423523903, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.0209710281342268, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.019870685413479805, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.014706261456012726, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.010959416627883911, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.010238802060484886, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.009885938838124275, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.009182765148580074, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.007674941327422857, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.007493103854358196, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.0061971405521035194, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.004863188602030277, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.03715066984295845, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.03715066984295845, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.23.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.21392899751663208, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.19318003952503204, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.18311117589473724, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.1654493808746338, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.0988866314291954, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.08980023860931396, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.12030774354934692, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.10899737477302551, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.10119953751564026, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.08588667213916779, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.0826672613620758, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.06127629056572914, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.05203637853264809, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.047376297414302826, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.04622315242886543, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.030594894662499428, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.024228589609265327, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.023340411484241486, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.02158379554748535, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.020823679864406586, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.015705494210124016, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.015054907649755478, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.013820003718137741, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.009314456954598427, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.030594894662499428, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.030594894662499428, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9937602877616882, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.23.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.191518172621727, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.17238803207874298, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.16627582907676697, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.14128530025482178, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.08897203207015991, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.08253679424524307, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.101402647793293, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.09297599643468857, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.09076115489006042, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.0737227350473404, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.06745781004428864, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.051927562803030014, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.04469252750277519, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.04286788031458855, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.042428985238075256, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.02600167877972126, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.02242804691195488, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.021968388929963112, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.019428348168730736, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.019125627353787422, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.013927510008215904, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.014203673228621483, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.013336142525076866, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.01002187468111515, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.02600167877972126, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.02600167877972126, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "6": 128 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.23.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.1522519439458847, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.14312292635440826, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.13978557288646698, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.12723608314990997, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.07179582864046097, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.06847603619098663, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.08049057424068451, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.0742296576499939, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.07250355184078217, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.06437437236309052, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.06163317337632179, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.040965016931295395, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03556331619620323, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.0344674251973629, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03420282155275345, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.020556697621941566, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.017854755744338036, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.017613422125577927, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.0163688026368618, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.016200734302401543, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.010975107550621033, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.01099309977144003, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.01062404178082943, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.007505943067371845, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03556331619620323, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03556331619620323, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.23.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.23843015730381012, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.2245674729347229, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.21990223228931427, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.2000853717327118, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.11252325028181076, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.1076202467083931, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.12516072392463684, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.11566879600286484, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.11354996263980865, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.10096662491559982, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.09660136699676514, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06375692039728165, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.055307965725660324, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.05387895181775093, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.05355052277445793, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03184676170349121, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.027599534019827843, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.027259759604930878, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.025240318849682808, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.025029702112078667, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.016708429902791977, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.016473570838570595, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.016234183683991432, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.010681196115911007, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03184676170349121, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03184676170349121, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9932650923728943, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.23.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.2095024436712265, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.1822015345096588, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.16931110620498657, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.15046177804470062, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.09523127228021622, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.08331472426652908, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.11715640872716904, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.10746320337057114, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.09888722002506256, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.07966334372758865, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.07618533074855804, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.05979299545288086, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.0516459085047245, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.04609519988298416, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.04468374326825142, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.030183615162968636, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.02454289048910141, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.023851068690419197, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.021566642448306084, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.020664218813180923, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.016300788149237633, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.0166681669652462, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.01443600282073021, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.011945411562919617, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.030183615162968636, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.030183615162968636, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "6": 32 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.24.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.11452885717153549, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.10511500388383865, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.09852664172649384, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.08939206600189209, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.05336076766252518, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.0481768436729908, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.06717633455991745, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.06118921563029289, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.05439314618706703, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.04708298668265343, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.04572339355945587, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03420671820640564, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.029268991202116013, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.0257338248193264, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.024838577955961227, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.01712592877447605, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.013466998934745789, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.012863763608038425, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.012226213701069355, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.011664291843771935, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.00900645088404417, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.008946648798882961, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.007750204298645258, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.006020393688231707, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03420671820640564, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03420671820640564, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.24.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.10095615684986115, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.09199374914169312, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.08344033360481262, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.07586381584405899, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.04661133140325546, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.04055570438504219, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.06282944232225418, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.057142630219459534, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.047691404819488525, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.04113065451383591, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.040329836308956146, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.03189454227685928, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.027258001267910004, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.022484073415398598, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.02123258449137211, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.015965528786182404, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.011740919202566147, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.010931235738098621, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.010663202032446861, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.009858007542788982, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.008315951563417912, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.008078688755631447, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.00664173997938633, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.00519496900960803, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.03189454227685928, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.03189454227685928, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.24.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.22301465272903442, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.2035807967185974, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.19461137056350708, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.17615163326263428, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.10369613766670227, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.09543643146753311, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.1239500567317009, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.1123971939086914, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.10573695600032806, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.09073670208454132, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.0871751606464386, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.06304847449064255, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.053638897836208344, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.049659766256809235, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.048671700060367584, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.031463708728551865, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.025329628959298134, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.02456829883158207, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.022682104259729385, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.022044183686375618, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.01615484245121479, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.015467653051018715, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.014489446766674519, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.009529095143079758, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.031463708728551865, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.031463708728551865, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9950752258300781, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.24.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.18141861259937286, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.16485795378684998, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.15551349520683289, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.12679263949394226, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.08552072197198868, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.07782601565122604, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.10407017171382904, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.0951523631811142, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.08625885844230652, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.06950195133686066, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.06358083337545395, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.053085193037986755, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.045619092881679535, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.041207388043403625, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.040127675980329514, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.026572396978735924, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.021476391702890396, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.020720025524497032, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.018308162689208984, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.017575856298208237, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.01400628313422203, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.01393310260027647, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.01249732170253992, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.009369940496981144, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.026572396978735924, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "6": 128 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.026572396978735924, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "6": 128 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.24.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.15149587392807007, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.14242038130760193, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.13912153244018555, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.12657910585403442, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.07148872315883636, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.06815068423748016, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.08009731769561768, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.07391893863677979, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.0721808522939682, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.0640324279665947, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.0612904317677021, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.040777262300252914, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03538615256547928, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.03429427742958069, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.0340430811047554, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.020411206409335136, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.01773320510983467, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.017490984871983528, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.01622655987739563, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.016069550067186356, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.010814925655722618, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.01086176186800003, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.010452763177454472, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.007343269418925047, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03538615256547928, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03538615256547928, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.24.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.24008512496948242, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.22620579600334167, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.22151072323322296, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.20156946778297424, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.1133670061826706, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.10839330404996872, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.12658396363258362, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.11651138216257095, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.11441976577043533, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.10171761363744736, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.09736834466457367, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06438954919576645, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.0557410903275013, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.054315805435180664, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.053981713950634, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03229152411222458, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.027884457260370255, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.027544157579541206, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.025508910417556763, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.025296533480286598, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.01713174767792225, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.016728347167372704, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.016668682917952538, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.010964182205498219, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03229152411222458, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "6": 128 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03229152411222458, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9943436980247498, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.24.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.21438007056713104, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.18672119081020355, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.17386312782764435, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.15463514626026154, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.09738100320100784, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.08546118438243866, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.12059411406517029, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.10955529659986496, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.10115152597427368, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.08164488524198532, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.07825355231761932, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.06122550740838051, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.05259086564183235, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.0470375120639801, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.04564155265688896, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.031093118712306023, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.024913432076573372, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.024235455319285393, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.021884845569729805, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.02098550833761692, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.01720062829554081, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.016748489812016487, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.015410037711262703, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.011842673644423485, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.031093118712306023, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "6": 32 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.031093118712306023, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "8": 32 + }, "bits": [ - 5 + 8 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.25.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.12083269655704498, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.11122491210699081, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.10456757247447968, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.0948047861456871, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.05638789385557175, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.05116257071495056, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.07064754515886307, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.06428352743387222, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.05742909386754036, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.049840569496154785, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.0484088771045208, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03596413880586624, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.030751438811421394, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.027166148647665977, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.026261504739522934, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.01799190044403076, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.01414154376834631, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.013530680909752846, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.012835155241191387, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.012266317382454872, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.009407500736415386, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.009274219162762165, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.008115128614008427, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.0061346981674432755, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03596413880586624, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03596413880586624, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.25.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.10061205178499222, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.09191762655973434, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.08383604139089584, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.07620549201965332, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.0465373620390892, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.040687184780836105, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.061912450939416885, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.056646138429641724, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.047538310289382935, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.04108615964651108, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.040139470249414444, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.03135596960783005, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.027017952874302864, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.022409925237298012, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.021203214302659035, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.015641046687960625, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.011659393087029457, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.010858993045985699, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.010572121478617191, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.009795657359063625, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.008110364899039268, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.007923994213342667, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.006527118384838104, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.005010275170207024, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.03135596960783005, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.03135596960783005, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.25.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.22187334299087524, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.20298805832862854, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.19419629871845245, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.1759341061115265, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.10324837267398834, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.09517738223075867, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.12379278242588043, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.11193736642599106, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.10527303069829941, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.09049289673566818, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.08715976774692535, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.0629558339715004, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.05339968949556351, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.04946443811058998, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.04850287735462189, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03143388032913208, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.02521114982664585, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.024414140731096268, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.022572433575987816, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.021948236972093582, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.01612413302063942, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.015342083759605885, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.014466334134340286, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.009380790404975414, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03143388032913208, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "6": 32 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03143388032913208, - "qparams": { - "group_size": 128, + { + "accuracy": 0.996357262134552, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, "bits": [ - 5 + 8 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.25.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.19035159051418304, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.16604214906692505, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.15777312219142914, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.1338513046503067, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.0887281745672226, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.07975544780492783, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.10290095210075378, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.09373565018177032, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.09074069559574127, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.07104899734258652, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.06402561813592911, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.05237478017807007, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.04494239017367363, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.04265962541103363, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.042096059769392014, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.026279956102371216, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.022065456956624985, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.021475667133927345, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.018512524664402008, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.018130771815776825, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.013989876955747604, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.013707311823964119, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.013200811110436916, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.009195813909173012, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.026279956102371216, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "8": 128 + }, "bits": [ - 5 + 8 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.026279956102371216, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "8": 128 + }, "bits": [ - 5 + 8 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.25.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.15427188575267792, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.14494141936302185, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.14150899648666382, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.12876854836940765, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.07284121215343475, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.06939606368541718, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.0816570371389389, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.07535859942436218, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.0735633596777916, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.0652044340968132, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.06241200864315033, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.041626546531915665, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03608658164739609, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.03495914489030838, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.034695833921432495, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.020852264016866684, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.018088020384311676, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.017835775390267372, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.01654999516904354, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.016380086541175842, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.011097531765699387, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.011097949929535389, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.010722949169576168, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.007524424232542515, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03608658164739609, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03608658164739609, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 ], "scale_bits": 4 } } - }, - { - "key": "model.layers.25.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.24130374193191528, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.22718219459056854, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.22242602705955505, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.20235639810562134, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.11403219401836395, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.10897491127252579, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.12667705118656158, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.11718305200338364, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.11508651077747345, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.10221415758132935, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.09765006601810455, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06447529792785645, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.05601239949464798, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.054578885436058044, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.054244909435510635, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03217235952615738, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.027894651517271996, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.027541017159819603, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.02546965517103672, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.0252538975328207, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.016698969528079033, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.016556940972805023, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.01621807925403118, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.010605577379465103, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03217235952615738, - "qparams": { - "group_size": 128, + ], + "model.layers.31.mlp": [ + { + "accuracy": 0.9328120946884155, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.05, + 0.95 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03217235952615738, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.05, + 0.95 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.25.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.21246539056301117, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.18458791077136993, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.17083828151226044, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.15251941978931427, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.09620118886232376, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.08378023654222488, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.11991209536790848, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.10975008457899094, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.10016003251075745, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.08094187825918198, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.0776660144329071, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.06113720312714577, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.05256909132003784, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.046408507972955704, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.04484984278678894, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.030830642208456993, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.024455910548567772, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.023698896169662476, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.021495819091796875, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.020498687401413918, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.0167385246604681, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.01641503907740116, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.014784152619540691, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.011369687505066395, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.030830642208456993, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, "bits": [ - 5 + 6, + 3, + 2 ], "bits_prop": [ - 1.0 + 0.05, + 0.2, + 0.75 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.030830642208456993, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9346612691879272, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.26.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.11376035958528519, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.10430456697940826, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.09726223349571228, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.08820700645446777, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.05307509005069733, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.04760260507464409, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.067514568567276, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.06148020178079605, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.05411755293607712, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.04683512821793556, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.04543595388531685, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03440513834357262, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.02947251871228218, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.025608131662011147, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.02461964078247547, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.017204130068421364, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.013402056880295277, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.012747734785079956, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.012168016284704208, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.011552945710718632, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.008973565883934498, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.008965136483311653, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.007587672211229801, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.00600266270339489, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03440513834357262, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03440513834357262, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.26.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.09622908383607864, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.08747828006744385, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.07848671823740005, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.07127632200717926, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.044312842190265656, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03800457343459129, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.06118439882993698, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.0555715374648571, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.04539947211742401, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03907454013824463, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.03840332105755806, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.030978647992014885, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.02647325210273266, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.02141893468797207, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.020059216767549515, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.01549567561596632, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.011206988245248795, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.010345504619181156, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.010166972875595093, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.009320410899817944, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.008094282820820808, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.00782595295459032, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.006349973380565643, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.005050992593169212, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.030978647992014885, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.030978647992014885, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + "bits_prop": [ + 0.25, + 0.75 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.26.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.2076054811477661, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.18725915253162384, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.1766294687986374, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.15968194603919983, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.09595026075839996, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.08649159222841263, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.1190316453576088, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.10699114948511124, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.09828411787748337, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.08319244533777237, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.08044670522212982, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.060789067298173904, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.05106038972735405, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.04597770422697067, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.04470814764499664, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03040117397904396, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.023519648239016533, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.02254987508058548, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.020927025005221367, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.020082898437976837, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.015613900497555733, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.014719231054186821, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.013452219776809216, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.009048263542354107, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03040117397904396, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, "bits": [ - 5 + 6, + 3, + 2 ], "bits_prop": [ - 1.0 + 0.05, + 0.2, + 0.75 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03040117397904396, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9440197348594666, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.26.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.17498326301574707, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.15672217309474945, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.15075790882110596, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.12977315485477448, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.08190862089395523, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.07517024874687195, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.09335885941982269, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.08590848743915558, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.08363350480794907, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.06815110892057419, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.06280434131622314, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.04772903770208359, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.04115067422389984, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.03934421017765999, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.038912829011678696, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.023868069052696228, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.02035491354763508, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.01982816308736801, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.01758740283548832, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.017299283295869827, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.01257244311273098, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.012565616518259048, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.011973617598414421, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.00843510776758194, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.023868069052696228, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.3, + 0.7 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.023868069052696228, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, "bits": [ - 5 + 5, + 3 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.26.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.158526211977005, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.14883656799793243, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.14539273083209991, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.13224004209041595, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.07494238764047623, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.07138160616159439, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.08393613249063492, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.07751064002513885, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.07569188624620438, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.06704376637935638, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.0640961304306984, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.04275195673108101, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03711431100964546, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.03594871237874031, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03567543253302574, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.021371962502598763, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.01855326257646084, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.01829192042350769, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.01695694960653782, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.01678430289030075, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.011245785281062126, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.011324391700327396, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.010860862210392952, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.007594595197588205, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03711431100964546, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03711431100964546, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + 0.05, + 0.95 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.26.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.23981992900371552, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.22573256492614746, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.2209373265504837, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.20095089077949524, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.11343277245759964, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.108416847884655, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.12635907530784607, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.11661016196012497, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.11448471248149872, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.10161041468381882, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.09711343050003052, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06442397087812424, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.055776532739400864, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.054331667721271515, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.05399119108915329, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03219061717391014, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.027841666713356972, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.027491826564073563, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.025429006665945053, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.025213783606886864, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.01693989522755146, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.01663978397846222, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.016455598175525665, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.010804812423884869, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03219061717391014, - "qparams": { - "group_size": 128, + }, + { + "accuracy": 0.9470396637916565, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03219061717391014, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.3, + 0.7 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.26.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.21588432788848877, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.1888512372970581, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.17486140131950378, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.15597616136074066, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.09810573607683182, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.08580294996500015, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.12272262573242188, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.11227893829345703, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.10196498036384583, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.0830080509185791, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.07951413094997406, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.06260396540164948, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.05396845191717148, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.04752375930547714, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.04590211063623428, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03182630240917206, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.025388458743691444, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.02458152547478676, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.022514445707201958, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.021475939080119133, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.01767595112323761, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.017443466931581497, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.01567509025335312, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.01252960879355669, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03182630240917206, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.05, + 0.95 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03182630240917206, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9662161469459534, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.27.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.11917426437139511, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.10940490663051605, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.10314266383647919, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.09364146739244461, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.05561661347746849, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.050555672496557236, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.06899715214967728, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.06283077597618103, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.056686483323574066, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.04909481480717659, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.04750063270330429, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03516862541437149, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.030071767047047615, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.026802631095051765, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.025986557826399803, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.017624564468860626, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.013978340663015842, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.013409369625151157, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.012679324485361576, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.012157154269516468, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.009255347773432732, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.009149945341050625, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.008072727359831333, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.006109146401286125, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03516862541437149, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03516862541437149, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.27.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.09884216636419296, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.09085135906934738, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.08489948511123657, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.07695894688367844, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.046029601246118546, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.041443850845098495, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.05849110335111618, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.053236186504364014, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.046913765370845795, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.04067007824778557, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.03941033035516739, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.029592756181955338, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.0254652202129364, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.022167909890413284, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.02133341133594513, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.014832265675067902, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.01153840497136116, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.010963181965053082, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.01047547534108162, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.009930609725415707, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.007755618076771498, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.007628869265317917, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.006594547536224127, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.004965579137206078, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.029592756181955338, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.029592756181955338, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.27.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.20588931441307068, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.18853314220905304, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.1804577261209488, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.16334965825080872, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.09599922597408295, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.08842575550079346, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.11369052529335022, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.10389699786901474, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.09776528924703598, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.08417247980833054, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.08076203614473343, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.05785753205418587, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.049605414271354675, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.045971330255270004, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.045095257461071014, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.02887023612856865, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.023406142368912697, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.02267291396856308, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.02095096930861473, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.02037627249956131, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.014792446978390217, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.014227847568690777, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.013416843488812447, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.00865805335342884, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.02887023612856865, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, "bits": [ - 5 + 8, + 4, + 3 ], "bits_prop": [ - 1.0 + 0.05, + 0.1, + 0.85 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.02887023612856865, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9690964818000793, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.27.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.17772440612316132, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.15704509615898132, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.15038473904132843, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.12558379769325256, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.08390217274427414, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.0767737552523613, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.0953947976231575, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.08723300695419312, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.08501718938350677, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.06598974019289017, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.06129085645079613, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.04905915632843971, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.04204940050840378, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.040547676384449005, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.040181081742048264, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.024730972945690155, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.021397791802883148, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.020993486046791077, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.017916398122906685, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.017670776695013046, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.01355475839227438, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.013755383901298046, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.013038311153650284, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.009993331506848335, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.024730972945690155, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.25, + 0.75 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.024730972945690155, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, "bits": [ - 5 + 8, + 4, + 3 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.27.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.15486136078834534, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.14521923661231995, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.141667440533638, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.12874498963356018, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.07319999486207962, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.0696222186088562, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.0822620615363121, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.07593046128749847, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.07399879395961761, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.06542466580867767, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.06256306171417236, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.04198095202445984, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.036405760794878006, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.03517794981598854, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03488249331712723, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.020996883511543274, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.018221527338027954, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.017958391457796097, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.016658250242471695, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.016474977135658264, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.011122742667794228, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.011236988939344883, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.01071261428296566, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.007651667110621929, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.036405760794878006, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.036405760794878006, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + 0.05, + 0.1, + 0.85 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.27.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.22915945947170258, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.215534046292305, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.2108297497034073, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.1917043924331665, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.10841603577136993, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.10349181294441223, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.12081869691610336, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.11162058264017105, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.10945924371480942, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.09704148769378662, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.0927901566028595, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.0616275891661644, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.0533965602517128, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.05193789303302765, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.0516032874584198, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.030799727886915207, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.026619551703333855, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.02627074345946312, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.024289628490805626, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.02407069317996502, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.016166631132364273, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.015917573124170303, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.01567879691720009, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.010338026098906994, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.030799727886915207, - "qparams": { - "group_size": 128, + }, + { + "accuracy": 0.9729781746864319, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.030799727886915207, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.25, + 0.75 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.27.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.21163129806518555, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.18413317203521729, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.16933736205101013, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.15067481994628906, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.09607583284378052, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.08322792500257492, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.1230873242020607, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.11128493398427963, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.10005010664463043, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.08091255277395248, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.0776929035782814, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.0624680295586586, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.053525812923908234, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.04665815085172653, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.04489182308316231, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.0316300094127655, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.02504798024892807, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.024186599999666214, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.02217945083975792, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.02107107639312744, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.01740931160748005, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.017444835975766182, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.015159836038947105, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.012647896073758602, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.0316300094127655, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, "bits": [ - 5 + 8, + 4 ], "bits_prop": [ - 1.0 + 0.05, + 0.95 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.0316300094127655, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9818095564842224, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.28.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.10846355557441711, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.0994943305850029, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.09172973036766052, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.0833696648478508, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.050570741295814514, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.044826019555330276, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.06659777462482452, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.06018847972154617, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.051611367613077164, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.044709838926792145, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.04374537989497185, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03394603729248047, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.028800688683986664, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.024455951526761055, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.023325424641370773, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.017018089070916176, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.012871318496763706, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.012138854712247849, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.011717972345650196, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.011009959504008293, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.00892404094338417, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.008807530626654625, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.0073334695771336555, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.0059243845753371716, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03394603729248047, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03394603729248047, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.28.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.09008632600307465, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.08174831420183182, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.07164165377616882, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.06524325162172318, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.041462622582912445, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03457123413681984, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.06000978872179985, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.05420265346765518, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.04253677278757095, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.036693163216114044, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.03637371212244034, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.030397048220038414, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.025883128866553307, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.02007337659597397, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.01845550537109375, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.015237156301736832, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.01059607695788145, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.00959065929055214, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.009664032608270645, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.00864709448069334, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.007932835258543491, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.00767711503431201, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.005907338112592697, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.00494297593832016, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.036693163216114044, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.036693163216114044, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 + "bits_prop": [ + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.28.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.20476500689983368, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.18430912494659424, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.17292487621307373, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.15615229308605194, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.09460007399320602, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.08471246808767319, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.11933553218841553, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.1071326732635498, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.09698750078678131, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.08198486268520355, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.07934626936912537, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.06109913811087608, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.05116520822048187, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.04547077789902687, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.04400032013654709, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03063930571079254, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.02343808300793171, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.022370105609297752, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.020913682878017426, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.019970305263996124, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.015775857493281364, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.015048469416797161, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.013364813290536404, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.009549430571496487, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03063930571079254, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, "bits": [ - 5 + 8, + 4 ], "bits_prop": [ - 1.0 + 0.05, + 0.95 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03063930571079254, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9837337732315063, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.28.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.20117461681365967, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.17750106751918793, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.16961735486984253, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.14170241355895996, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.09416309744119644, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.08513269573450089, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.10749728232622147, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.09834583103656769, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.09580652415752411, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.07478103786706924, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.0682695284485817, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.05511130020022392, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.047206055372953415, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.04529087245464325, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.04483683407306671, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.027577517554163933, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.02363193966448307, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.023123763501644135, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.019812026992440224, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.01948498748242855, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.01472675334662199, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.014892439357936382, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.014100172556936741, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.010395558550953865, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.027577517554163933, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.027577517554163933, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, "bits": [ - 5 + 8, + 4 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.28.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.14349111914634705, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.13443629443645477, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.13088840246200562, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.11890491098165512, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.06783320009708405, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.06436826288700104, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.07661287486553192, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.07071147114038467, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.06860414892435074, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.06054321303963661, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.057908326387405396, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.039061568677425385, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03389964997768402, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.03260255604982376, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03230258822441101, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.01954074762761593, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.01690012216567993, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.016632666811347008, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.015436218120157719, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.015244835987687111, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.010319320484995842, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.010460889898240566, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.009887807071208954, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.00712869456037879, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03389964997768402, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03389964997768402, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + 0.05, + 0.95 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.28.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.21158021688461304, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.19872893393039703, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.1941630095243454, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.1764296293258667, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.1001596674323082, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.09544315934181213, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.11209852248430252, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.10348561406135559, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.10115627944469452, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.08952431380748749, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.08556748926639557, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.05721382796764374, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.049566831439733505, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.048071496188640594, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.0477195605635643, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.028632663190364838, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.024809930473566055, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.024467576295137405, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.02266097627580166, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.022432547062635422, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.015147864818572998, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.01512912567704916, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.014642241410911083, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.010146615095436573, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.028632663190364838, - "qparams": { - "group_size": 128, + }, + { + "accuracy": 0.9825939536094666, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.028632663190364838, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.25, + 0.75 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.28.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.1969417780637741, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.17052705585956573, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.1561114639043808, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.1380072832107544, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.08959715813398361, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.07703614979982376, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.11475022882223129, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.10431656986474991, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.09331539273262024, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.07475513219833374, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.07163295149803162, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.05872416868805885, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.05035082623362541, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.043656617403030396, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.04195272922515869, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.030031580477952957, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.023713430389761925, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.022863896563649178, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.0209564920514822, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.019882608205080032, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.017007000744342804, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.016840757802128792, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.014905511401593685, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.012504907324910164, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.030031580477952957, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, "bits": [ - 5 + 8, + 5, + 4 ], "bits_prop": [ - 1.0 + 0.05, + 0.1, + 0.85 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.030031580477952957, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9848273396492004, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.29.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.10690061002969742, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.09815768152475357, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.09078006446361542, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.0825648233294487, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.04968932643532753, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.044237203896045685, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.06577616930007935, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.058795370161533356, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.050727710127830505, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.04405030980706215, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.04312605783343315, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.033581119030714035, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.028123414143919945, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.023980997502803802, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.02292216569185257, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.016823867335915565, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.01257700938731432, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.011883282102644444, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.01145853940397501, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.010782822035253048, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.008814052678644657, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.008520789444446564, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.007226846180856228, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.005659508518874645, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.033581119030714035, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.033581119030714035, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.29.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.08952047675848007, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.08250603824853897, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.07497931271791458, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.06816376745700836, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.04143272712826729, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03625224903225899, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.056677695363759995, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.05112679675221443, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.04229027405381203, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03695809096097946, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.03626757860183716, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.028844524174928665, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.02436959184706211, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.01998511515557766, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.01881789043545723, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.014426960609853268, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.010419209487736225, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.009669199585914612, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.009525767527520657, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.008776616305112839, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.007505323737859726, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.007166166789829731, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.005893015302717686, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.004537094384431839, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03695809096097946, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03695809096097946, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 + "bits_prop": [ + 0.25, + 0.75 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.29.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.22343318164348602, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.20583198964595795, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.1970372349023819, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.17883554100990295, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.10455653071403503, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.09663528949022293, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.1252603977918625, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.11359316110610962, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.10641433298587799, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.09235928952693939, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.08901331573724747, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.06397916376590729, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.054314758628606796, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.050141558051109314, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.04913342371582985, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03200586885213852, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.02567833848297596, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.024901505559682846, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.023173008114099503, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.022507593035697937, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.016481203958392143, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.015849148854613304, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.014732515439391136, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.009946608915925026, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03200586885213852, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, "bits": [ - 5 + 8, + 5, + 4 ], "bits_prop": [ - 1.0 + 0.05, + 0.1, + 0.85 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03200586885213852, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9908666610717773, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.29.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.18134064972400665, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.15929216146469116, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.1519651859998703, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.12979629635810852, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.08495883643627167, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.07680557668209076, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.09868109226226807, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.08883953094482422, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.08660052716732025, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.06747069954872131, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.06320880353450775, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.050655968487262726, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.04272669553756714, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.04094654694199562, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.04052916541695595, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.025376873090863228, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.021435245871543884, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.020939169451594353, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.01798781380057335, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.01769905723631382, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.0135725736618042, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.01360308937728405, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.012966052629053593, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.009608258493244648, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.025376873090863228, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.25, + 0.75 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.025376873090863228, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, "bits": [ + 8, + 6, 5 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.29.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.14316187798976898, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.1339927315711975, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.13035334646701813, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.11837691813707352, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.06766904890537262, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.0641142725944519, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.07660317420959473, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.07066255807876587, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.06845089793205261, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.06031977757811546, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.05773473158478737, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.039120301604270935, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03392711281776428, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.03257099166512489, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03225318342447281, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.019586089998483658, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.016988974064588547, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.016711659729480743, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.015529374592006207, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.015325463376939297, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.01042622048407793, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.010677981190383434, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.009972340427339077, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.007455783896148205, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03392711281776428, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03392711281776428, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + 0.05, + 0.1, + 0.85 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.29.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.19063128530979156, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.1788778007030487, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.17447900772094727, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.1586374044418335, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.09084618091583252, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.08643306791782379, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.10197679698467255, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.09406305104494095, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.09178650379180908, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.08120432496070862, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.07779013365507126, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.052632078528404236, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.04590696468949318, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.04445236176252365, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.04412015527486801, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.026508206501603127, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.024417225271463394, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.02411825954914093, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.022642530500888824, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.02244783192873001, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.014941207133233547, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.01685900241136551, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.01449095644056797, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.013490079902112484, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.026508206501603127, - "qparams": { - "group_size": 128, + }, + { + "accuracy": 0.9915027618408203, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.026508206501603127, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.25, + 0.75 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.29.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.20269359648227692, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.17690929770469666, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.1641721874475479, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.14364421367645264, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.09235961735248566, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.08091681450605392, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.11539975553750992, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.10483922809362411, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.0958005040884018, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.07705017179250717, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.07277034223079681, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.058502696454524994, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.05042333900928497, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.044773731380701065, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.04334237053990364, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.029434259980916977, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.02398800663650036, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.02328638546168804, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.021100766956806183, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.020189059898257256, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.015932833775877953, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.016506826505064964, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.014096222817897797, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.012004214338958263, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.029434259980916977, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, "bits": [ + 8, + 6, 5 ], "bits_prop": [ - 1.0 + 0.05, + 0.1, + 0.85 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.029434259980916977, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9941468238830566, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.30.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.10471411049365997, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.09576410055160522, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.0870123952627182, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.07887396961450577, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.04871717840433121, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.04245203733444214, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.06626719981431961, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.05969509109854698, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.049786053597927094, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.04296760633587837, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.04230266436934471, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03378073498606682, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.02864416502416134, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.02361886203289032, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.02229028195142746, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.016947336494922638, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.01250664796680212, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.011663155630230904, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.01138290110975504, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.010554751381278038, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.008889016695320606, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.008782408200204372, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.007061461918056011, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.005944917909801006, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03378073498606682, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03378073498606682, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.30.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.09003240615129471, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.0817970484495163, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.07096024602651596, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.06455891579389572, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.04136183485388756, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03410092741250992, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.06110008433461189, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.05508855730295181, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.042471762746572495, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03666258230805397, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.03640655055642128, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.031059157103300095, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.02630346454679966, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.020061004906892776, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.018304666504263878, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.015588829293847084, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.010599716566503048, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.009507249109447002, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.009686173871159554, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.008568458259105682, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.008089637383818626, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.007788481656461954, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.005844669416546822, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.004978443030267954, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03666258230805397, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03666258230805397, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.30.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.19441334903240204, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.17401346564292908, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.16170772910118103, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.1461573839187622, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.08949874341487885, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.0791262611746788, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.11597926914691925, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.10349378734827042, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.09196362644433975, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.07746374607086182, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.07553005963563919, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.05948936566710472, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.049454689025878906, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.04306807368993759, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.04143938422203064, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.02991427481174469, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.02230706624686718, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.02114938013255596, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.019869402050971985, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.018832532688975334, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.015476986765861511, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.014625412411987782, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.012689486145973206, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.009386420249938965, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.02991427481174469, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "6": 128 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.02991427481174469, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, "bits": [ - 5 + 8, + 6 ], "bits_prop": [ - 1.0 + 0.05, + 0.95 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.30.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.1667550802230835, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.14641304314136505, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.1401718556880951, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.12027084827423096, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.07770568877458572, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.07060690224170685, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.0886828824877739, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.08087179064750671, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.0791531354188919, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.06224555894732475, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.0581745021045208, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.0454416424036026, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.03903960809111595, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.0376417450606823, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.03730753809213638, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.022888880223035812, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.020149163901805878, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.01974339224398136, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.017217885702848434, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.016997849568724632, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.012588417157530785, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.013311521150171757, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.012149696238338947, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.010069030337035656, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.03730753809213638, - "qparams": { - "group_size": 32, + }, + { + "accuracy": 0.9946669340133667, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, "bits": [ - 6, - 4 + 8, + 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.03730753809213638, - "qparams": { - "group_size": 32, + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, "bits": [ - 6, - 4 + 8, + 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.30.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.1414370834827423, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.13248448073863983, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.12903602421283722, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.11729948967695236, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.06734663993120193, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.06389883160591125, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.07590622454881668, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.07006675750017166, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.06805670261383057, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.060122326016426086, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.05764613300561905, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.03918704017996788, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.034221090376377106, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.03299294039607048, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03269929811358452, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.01977364718914032, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.018125304952263832, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.017872020602226257, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.016795538365840912, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.016624247655272484, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.011129512451589108, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.012550463899970055, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.010744123719632626, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.010026657022535801, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.034221090376377106, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.034221090376377106, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.30.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.15379269421100616, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.144292950630188, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.1408042460680008, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.12792177498340607, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.07325790077447891, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.06966042518615723, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.08220001310110092, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.07581069320440292, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.0739823430776596, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.06541101634502411, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.06265615671873093, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.04242462292313576, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03686835989356041, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.035705920308828354, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03542030602693558, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.021356161683797836, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.0193965844810009, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.01914285309612751, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.01792731136083603, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.017765268683433533, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.011935783550143242, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.013146191835403442, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.011563225649297237, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.010310317389667034, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03686835989356041, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03686835989356041, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.30.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.19682368636131287, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.17287999391555786, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.16219832003116608, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.14039038121700287, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.09055659919977188, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.0806942731142044, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.11212372779846191, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.10019534826278687, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.09329470992088318, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.07519035786390305, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.0708611011505127, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.05732057988643646, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.04889031499624252, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.04450692981481552, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.04342574626207352, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.029533715918660164, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.024798739701509476, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.024283137172460556, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.02204105257987976, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.021382218226790428, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.017312675714492798, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.017884761095046997, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.01598913036286831, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.014234540984034538, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.029533715918660164, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, "bits": [ - 5 + 8, + 6 ], "bits_prop": [ - 1.0 + 0.15, + 0.85 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.029533715918660164, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9952515363693237, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, "bits": [ - 5 + 8, + 6 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.31.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.1030498668551445, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.09370315074920654, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.08517935872077942, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.077263742685318, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.047722723335027695, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.041544247418642044, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.06482753902673721, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.05839618667960167, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.04899658262729645, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.042012955993413925, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.041289426386356354, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.033091045916080475, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.027960343286395073, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.023122861981391907, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.021844832226634026, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.016577769070863724, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.012234787456691265, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.011428850702941418, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.011122318916022778, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.010325808078050613, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.008751476183533669, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.00857359729707241, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.007010499015450478, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.0057992227375507355, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.033091045916080475, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.033091045916080475, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.31.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.09095471352338791, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.0824134573340416, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.07192391902208328, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.06537044048309326, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.04178288206458092, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.034705061465501785, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.061865631490945816, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.055056024342775345, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.042939212173223495, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.036897167563438416, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.03670340031385422, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.03136592358350754, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.026228079572319984, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.020255692303180695, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.018579624593257904, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.01571427658200264, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.01070790458470583, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.009691311046481133, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.009754559025168419, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.008721022866666317, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.008248085156083107, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.007798638194799423, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.006084908731281757, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.005077349953353405, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.036897167563438416, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.03670340031385422, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 + "bits_prop": [ + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.31.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.2058115154504776, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.18189965188503265, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.16703377664089203, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.15109434723854065, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.09441646933555603, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.0821528285741806, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.12298566848039627, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.11089914292097092, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.0975913479924202, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.08096430450677872, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.0787675529718399, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.06308306753635406, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.05301287770271301, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.04547103866934776, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.04349803924560547, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.031639546155929565, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.023513736203312874, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.022129720076918602, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.020808011293411255, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.019524376839399338, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.0162960272282362, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.015534842386841774, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.013279604725539684, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.00981434527784586, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.031639546155929565, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "8": 128 + }, "bits": [ - 5 + 8 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.031639546155929565, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9963817596435547, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, "bits": [ - 5 + 8 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.31.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.08833324909210205, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.0805683583021164, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.07789880782365799, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.06651676446199417, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.04036456719040871, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.037713173776865005, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.04671837389469147, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.04163821414113045, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.04087241739034653, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.03389901667833328, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.031136680394411087, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.023686563596129417, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.02119663916528225, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.020602118223905563, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.020464323461055756, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.01246565766632557, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.012535763904452324, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.012406978756189346, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.011517750099301338, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.011454272083938122, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.0076867216266691685, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.009846127592027187, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.007527696434408426, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.008778665214776993, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.03389901667833328, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.03389901667833328, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.31.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.1436934620141983, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.1350702941417694, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.13187044858932495, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.11970429867506027, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.06803950667381287, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.06480888277292252, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.07639555633068085, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.07044854015111923, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.06868346035480499, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.06074245646595955, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.058089256286621094, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.03897850960493088, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03375409543514252, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.03267723321914673, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03241344168782234, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.01948518492281437, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.01688072830438614, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.016636261716485023, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.015402579680085182, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.015240008011460304, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.010237137787044048, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.010334931313991547, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.009870627894997597, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.006958858110010624, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03375409543514252, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03375409543514252, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.31.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.11118921637535095, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.10437948256731033, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.10183648020029068, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.09236431866884232, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.05264255404472351, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.05010106787085533, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.05914977937936783, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.0544707365334034, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.05315946415066719, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.04693306237459183, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.044892627745866776, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.030292609706521034, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.026199154555797577, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.02537982352077961, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.025183523073792458, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.01519948709756136, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.013342220336198807, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.013156912289559841, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.01221916452050209, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.012102936394512653, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.008241337724030018, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.00849149189889431, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.007969828322529793, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.0061171348206698895, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.030292609706521034, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.030292609706521034, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + "bits_prop": [ + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.31.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.14833177626132965, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.13178956508636475, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.12375067174434662, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.10751177370548248, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.06874558329582214, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.06162426993250847, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.08662161231040955, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.07633817195892334, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.07050946354866028, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.05782431364059448, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.05486736819148064, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.04447155445814133, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.03752622753381729, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.03398587182164192, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.033127736300230026, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.022967394441366196, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.01928587816655636, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.01886618137359619, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.01737712323665619, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.01687389239668846, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.013565979897975922, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.01426903810352087, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.012454262003302574, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.011658101342618465, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.03398587182164192, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.03398587182164192, - "qparams": { - "group_size": 32, + }, + "down_proj": { + "group_size": { + "8": 128 + }, "bits": [ - 5, - 4 + 8 ], "bits_prop": [ - 0.1, - 0.9 + 1 ], "scale_bits": 4 } } - } - ], - "last_module_idx": 66, - "base_perplexity": 11.133852362390622 + ], + "model.norm.norm": null, + "lm_head.linear": null + }, + "last_module_idx": 66 } \ No newline at end of file