{ "measurement": { "model.layers.0.self_attn": [ { "accuracy": 0.846385340157308, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8670843541622162, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8880218887015393, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9219632529114422, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9248139297491625, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9298182245539992, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9417410312514556, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9482899961110792, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9536740066189515, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9559611572246802, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9637513543037992, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9664112066752032, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.967982933611462, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9711626785758295, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.981876133990131, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9840561123564839, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9849492262950853, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9922585192038432, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9959459496465953, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.0.block_sparse_moe": [ { "accuracy": 0.8324242069532997, "total_bits": 1581846784, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.8388489963192689, "total_bits": 1636897024, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.8565590291431076, "total_bits": 1829089280, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8607355258182475, "total_bits": 2051911680, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9228841970233541, "total_bits": 2313589120, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9289927174778361, "total_bits": 2371489792, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9353208004644042, "total_bits": 2549817728, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9612029226202714, "total_bits": 2914965888, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.964529751064746, "total_bits": 2957905920, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9617070203745052, "total_bits": 3006173568, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9665895215186634, "total_bits": 3064074240, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9808745485191283, "total_bits": 3698758016, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9836898759791726, "total_bits": 3756658688, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9897603520535325, "total_bits": 4278096256, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9903831579232294, "total_bits": 4441539584, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.991698085747071, "total_bits": 4839998464, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.997267104807849, "total_bits": 5662082048, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.1.self_attn": [ { "accuracy": 0.8521327913591736, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8637357546310676, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.8751458702118773, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9195249268883153, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9248873556130811, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9270870924780243, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9479517913178394, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9511575577290434, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9555047873995806, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9583136213845328, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9622990587039998, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9647127313441352, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9663331522361228, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9692848717891857, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9812996867848071, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9841903593195113, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9860342596412489, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9913815130773735, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9963134127963138, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.1.block_sparse_moe": [ { "accuracy": 0.9324584905254214, "total_bits": 1581846784, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9478668086230755, "total_bits": 1636897024, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9493800366395398, "total_bits": 1829089280, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9498187403537726, "total_bits": 2051911680, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9808891446966874, "total_bits": 2313589120, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9875295693544965, "total_bits": 2371489792, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.988905018097476, "total_bits": 2549817728, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9920513479676294, "total_bits": 2914965888, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9938272803433632, "total_bits": 2957905920, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.993752601360412, "total_bits": 3006173568, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9948625652928298, "total_bits": 3064074240, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9969651552627942, "total_bits": 3698758016, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.996982053485944, "total_bits": 3756658688, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9981960501815927, "total_bits": 4278096256, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9982470140645379, "total_bits": 4441539584, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9984907397087418, "total_bits": 4839998464, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.999117652193251, "total_bits": 5662082048, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.2.self_attn": [ { "accuracy": 0.9795441449080643, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9813775877027135, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.983051600228799, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.986047137185539, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9889153237209508, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9888666628026649, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9924452196267483, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9928540734476164, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9930738379051419, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.993209737107942, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9935248297835259, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9936493357251349, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9949891750063551, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9954101890570631, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9971753409328429, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9976039941079522, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9976401237378779, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9989218185043061, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9993825332188097, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.2.block_sparse_moe": [ { "accuracy": 0.9670179018652753, "total_bits": 1581846784, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.968241252691338, "total_bits": 1636897024, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.971632618261011, "total_bits": 1829089280, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9724238111000312, "total_bits": 2051911680, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9843998258247187, "total_bits": 2313589120, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9856258957695804, "total_bits": 2371489792, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.986865914475761, "total_bits": 2549817728, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9921592393350837, "total_bits": 2914965888, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9928100429181206, "total_bits": 2957905920, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9922667818241998, "total_bits": 3006173568, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.993231785214065, "total_bits": 3064074240, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.996173335631427, "total_bits": 3698758016, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9967265648546776, "total_bits": 3756658688, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9979728608220619, "total_bits": 4278096256, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.99808885968952, "total_bits": 4441539584, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9983284447807819, "total_bits": 4839998464, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9994732041209078, "total_bits": 5662082048, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.3.self_attn": [ { "accuracy": 0.9551259400813203, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9575085420357554, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9661188743224269, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9699701259010717, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9784623553094111, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9795324101455902, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9880765460333542, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9887280390156727, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9895580301649476, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9897787445960077, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9895667863127432, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9900656247903642, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9928545017824754, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9932616465774021, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9960276977967863, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9966938559603142, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9967310187210771, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.998578734573369, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9991499127538287, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.3.block_sparse_moe": [ { "accuracy": 0.9548824113842688, "total_bits": 1581846784, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9566254253058057, "total_bits": 1636897024, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9615888638715995, "total_bits": 1829089280, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9626979312222255, "total_bits": 2051911680, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9785523645971951, "total_bits": 2313589120, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9803317003932438, "total_bits": 2371489792, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9820941828289315, "total_bits": 2549817728, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9892527189988055, "total_bits": 2914965888, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9901440804824233, "total_bits": 2957905920, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9893892295658588, "total_bits": 3006173568, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9907122704592582, "total_bits": 3064074240, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9947478979776957, "total_bits": 3698758016, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9954981810633877, "total_bits": 3756658688, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9972210151486491, "total_bits": 4278096256, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9973904589852808, "total_bits": 4441539584, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9977305036797923, "total_bits": 4839998464, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.999288983342826, "total_bits": 5662082048, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.4.self_attn": [ { "accuracy": 0.9501384955487753, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9532344313828569, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9606703547270674, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9668476238454643, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9747759504733902, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9754207195027879, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9852170429535603, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9855685299262404, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9862806395088372, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9868911676304905, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9874015084810948, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9882506720329586, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9907431018195654, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9914250604465211, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.994904503912518, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9956280516803657, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9959938074829743, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9979615601968315, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9989524221868793, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.4.block_sparse_moe": [ { "accuracy": 0.9430409505179054, "total_bits": 1581846784, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9452259920929608, "total_bits": 1636897024, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.952361611747428, "total_bits": 1829089280, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.953863462335185, "total_bits": 2051911680, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9732333503075337, "total_bits": 2313589120, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9754221997174778, "total_bits": 2371489792, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.977813727369434, "total_bits": 2549817728, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9865550903701469, "total_bits": 2914965888, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9875913096199694, "total_bits": 2957905920, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9867197757488803, "total_bits": 3006173568, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9884021188083448, "total_bits": 3064074240, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9934098214695328, "total_bits": 3698758016, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9943592815757974, "total_bits": 3756658688, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.99648185185843, "total_bits": 4278096256, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9967099708186364, "total_bits": 4441539584, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.997184813145156, "total_bits": 4839998464, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9990889086191984, "total_bits": 5662082048, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.5.self_attn": [ { "accuracy": 0.9475822389910096, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9508471961476301, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9593624929456335, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9663599807965129, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.974280083453969, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9746846952720692, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9852304836539062, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9857582308744129, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9867243649634091, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9871439644576687, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9874821053327698, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9879955599868768, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9903234019630441, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.991094174206649, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9948129127733409, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9955492213883094, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9961505988612771, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9977290626970658, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9989589033924092, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.5.block_sparse_moe": [ { "accuracy": 0.9330970599855247, "total_bits": 1581846784, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9357169303846986, "total_bits": 1636897024, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9442015262810808, "total_bits": 1829089280, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.945981360187656, "total_bits": 2051911680, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9685394747280761, "total_bits": 2313589120, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9711701207255062, "total_bits": 2371489792, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9740049669421033, "total_bits": 2549817728, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9842126996893632, "total_bits": 2914965888, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9854353756496781, "total_bits": 2957905920, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9844052731794747, "total_bits": 3006173568, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9863939782800643, "total_bits": 3064074240, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9922800195452414, "total_bits": 3698758016, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9933836153022161, "total_bits": 3756658688, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9958625629446224, "total_bits": 4278096256, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9961509568812815, "total_bits": 4441539584, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9967106071080228, "total_bits": 4839998464, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9989239935070815, "total_bits": 5662082048, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.6.self_attn": [ { "accuracy": 0.9403435831007204, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.943042871591292, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9522734310870108, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9632584452629089, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9709139095717355, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9716128685364598, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.983950871435043, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9846181383258418, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9857672229899388, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9863739558740666, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9857180962633145, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9865654392756129, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.988739458010777, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9896529544586021, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9939449530119371, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9948137817463201, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9959104807287651, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9973848418724772, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9989016128697825, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.6.block_sparse_moe": [ { "accuracy": 0.926799679861257, "total_bits": 1581846784, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9295770284768782, "total_bits": 1636897024, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9390443865405886, "total_bits": 1829089280, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9410078419666541, "total_bits": 2051911680, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9655850874750238, "total_bits": 2313589120, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9684472279132981, "total_bits": 2371489792, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.971598464211351, "total_bits": 2549817728, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9828122296909753, "total_bits": 2914965888, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9841036467175734, "total_bits": 2957905920, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9829470692202449, "total_bits": 3006173568, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9851054365403558, "total_bits": 3064074240, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9915641695261002, "total_bits": 3698758016, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9927779776779445, "total_bits": 3756658688, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9955281341619986, "total_bits": 4278096256, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9957901725766102, "total_bits": 4441539584, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9964275332290287, "total_bits": 4839998464, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9988426360896004, "total_bits": 5662082048, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.7.self_attn": [ { "accuracy": 0.9365530625769967, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9381882597349191, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9474649635192595, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9600888077953929, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9682890978690825, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9684009453967998, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9823253581692514, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9825815390305299, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9839327674555151, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9846781739278844, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.984113915872417, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9853785711487657, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9871541570970103, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9880625561712996, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9929636530578136, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9940473428608751, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9953926458562675, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9969289508452149, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9987685527885333, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.7.block_sparse_moe": [ { "accuracy": 0.9228573219948694, "total_bits": 1581846784, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9256855231758795, "total_bits": 1636897024, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9358362961364419, "total_bits": 1829089280, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9379441061694371, "total_bits": 2051911680, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9635149213044267, "total_bits": 2313589120, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.966613655615794, "total_bits": 2371489792, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9700325820595026, "total_bits": 2549817728, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9816514553110066, "total_bits": 2914965888, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9831282199409447, "total_bits": 2957905920, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9818561818744791, "total_bits": 3006173568, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9841741921968366, "total_bits": 3064074240, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9909754595964363, "total_bits": 3698758016, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9923101421690693, "total_bits": 3756658688, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9952036216732507, "total_bits": 4278096256, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9954999021965226, "total_bits": 4441539584, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9962162414541174, "total_bits": 4839998464, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9987618498577687, "total_bits": 5662082048, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.8.self_attn": [ { "accuracy": 0.9310825231828188, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9334916837121311, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9421536724426245, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9564096921760785, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9649832523182819, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9654168255235019, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9805737694137191, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9808163152713525, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9821695956940714, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9832292308442687, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9824752989470175, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9835325159916752, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9857140314814291, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9867697178239101, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9920890752569234, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9932691788693008, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9947937725503978, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9964956084854508, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9986297639736318, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.8.block_sparse_moe": [ { "accuracy": 0.9204807552067857, "total_bits": 1581846784, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9234070652409604, "total_bits": 1636897024, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9335175503633524, "total_bits": 1829089280, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9356210229820326, "total_bits": 2051911680, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9623162945438373, "total_bits": 2313589120, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9654689245906315, "total_bits": 2371489792, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.968888534331008, "total_bits": 2549817728, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9811484676442648, "total_bits": 2914965888, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.982538180768882, "total_bits": 2957905920, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9812206204882578, "total_bits": 3006173568, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9836560174528706, "total_bits": 3064074240, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9906556778097231, "total_bits": 3698758016, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.992043827268246, "total_bits": 3756658688, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9950413011063478, "total_bits": 4278096256, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9953316424017478, "total_bits": 4441539584, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9960382048555306, "total_bits": 4839998464, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9987212244672455, "total_bits": 5662082048, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.9.self_attn": [ { "accuracy": 0.9227808321777143, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9245412763404219, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9349466119157641, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9482090630029377, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9598034725181366, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9609961889095997, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9779940328903889, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9779499097678223, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9802826994441842, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9814546593513928, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9799881231431898, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9816469519252056, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9842005922508082, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9853926774506506, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9912662639919865, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.992505823016951, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.994027387718425, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9963108478236551, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9984306024669326, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.9.block_sparse_moe": [ { "accuracy": 0.9186476008280328, "total_bits": 1581846784, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9215159339732245, "total_bits": 1636897024, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9319214016983384, "total_bits": 1829089280, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9340870174530306, "total_bits": 2051911680, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9614756679848621, "total_bits": 2313589120, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9646684247020044, "total_bits": 2371489792, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9681861304530972, "total_bits": 2549817728, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9807194060106811, "total_bits": 2914965888, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9821537621809464, "total_bits": 2957905920, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9808666882546324, "total_bits": 3006173568, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9832957325699297, "total_bits": 3064074240, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9905135765132543, "total_bits": 3698758016, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9919045056755605, "total_bits": 3756658688, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9949800188310052, "total_bits": 4278096256, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.995298075501954, "total_bits": 4441539584, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9960413019442441, "total_bits": 4839998464, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9987066149999584, "total_bits": 5662082048, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.10.self_attn": [ { "accuracy": 0.9263995758404857, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9280166604409092, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9379807053820083, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9522572022519613, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9620968166150545, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9630412202524512, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9783801015858588, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9791110296194491, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9810583774784678, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9818809954939705, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.98102107889166, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9824635798209592, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.984751472347661, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9858558184810375, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9916773429385534, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9929503999699495, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9944743210768425, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9962296926931135, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9985284238355234, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.10.block_sparse_moe": [ { "accuracy": 0.9179219334925476, "total_bits": 1581846784, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9208223566805062, "total_bits": 1636897024, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9312317383132482, "total_bits": 1829089280, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9334287992433498, "total_bits": 2051911680, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9611955391929338, "total_bits": 2313589120, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9643507914519623, "total_bits": 2371489792, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.967939004007923, "total_bits": 2549817728, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9807050981509843, "total_bits": 2914965888, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9821274705525291, "total_bits": 2957905920, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9807698768808654, "total_bits": 3006173568, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9832176146538634, "total_bits": 3064074240, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9905040371231735, "total_bits": 3698758016, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9919005270655218, "total_bits": 3756658688, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9950200153122607, "total_bits": 4278096256, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9953120934017199, "total_bits": 4441539584, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9960557815226677, "total_bits": 4839998464, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9987227458531331, "total_bits": 5662082048, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.11.self_attn": [ { "accuracy": 0.9272899478673935, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9276470020413399, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9374686552113608, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9502892423617213, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9625186936831788, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9633390976018027, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9797175423683304, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9800941193182218, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9812152203368513, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9823465504144367, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9816771441776502, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9824309838249495, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9847063060457769, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9860868631420951, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.991836195296951, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9930776141118258, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9946522794688415, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9963443699464398, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9985927237124231, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.11.block_sparse_moe": [ { "accuracy": 0.9163732097337121, "total_bits": 1581846784, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9193548003309652, "total_bits": 1636897024, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9301097585182441, "total_bits": 1829089280, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9323978831893519, "total_bits": 2051911680, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9602782044951853, "total_bits": 2313589120, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9636333191669301, "total_bits": 2371489792, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9673173698155504, "total_bits": 2549817728, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.980115708159773, "total_bits": 2914965888, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9816270657373887, "total_bits": 2957905920, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9802890293496219, "total_bits": 3006173568, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9827987267880848, "total_bits": 3064074240, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9902477094805554, "total_bits": 3698758016, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9917030130807114, "total_bits": 3756658688, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9948482973329527, "total_bits": 4278096256, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9951870612873647, "total_bits": 4441539584, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9959776910928715, "total_bits": 4839998464, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9986808355346224, "total_bits": 5662082048, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.12.self_attn": [ { "accuracy": 0.9183806439763621, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9226338867294162, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9354290675960089, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.951254987795102, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9600028915232733, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9606332534826115, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9773664190188834, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9782024943514874, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9798377019500262, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9806975354685595, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9801827449547617, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.981384973678934, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.984072589462525, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9854205264954975, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9914159666207668, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9927223878422458, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9941089358916017, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9961723701685274, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.998416797968706, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.12.block_sparse_moe": [ { "accuracy": 0.9149962277396729, "total_bits": 1581846784, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9180999929575544, "total_bits": 1636897024, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9289694514713789, "total_bits": 1829089280, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9313249023337113, "total_bits": 2051911680, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9596142080661497, "total_bits": 2313589120, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9630738560502466, "total_bits": 2371489792, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9667932666642101, "total_bits": 2549817728, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9795101667803369, "total_bits": 2914965888, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9811005180603579, "total_bits": 2957905920, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9799635790680584, "total_bits": 3006173568, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.982545079418311, "total_bits": 3064074240, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9900827084815031, "total_bits": 3698758016, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9915601911856547, "total_bits": 3756658688, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9946493926874705, "total_bits": 4278096256, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9951088999300018, "total_bits": 4441539584, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9959307032220653, "total_bits": 4839998464, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9986207535410742, "total_bits": 5662082048, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.13.self_attn": [ { "accuracy": 0.9144012908402243, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9174986803217938, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9305551852050581, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.945450622980532, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.957477915737974, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9579711731915411, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9762551714911273, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9771409908211545, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9785458887682149, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9795245103243935, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9785206440444055, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9802142043450945, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9828334983536288, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9841697327792645, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9908477053732464, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9920735385170892, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9938684424728548, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9958915048597479, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9983690507330099, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.13.block_sparse_moe": [ { "accuracy": 0.9124777630755776, "total_bits": 1581846784, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.91576033614968, "total_bits": 1636897024, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9270481390780524, "total_bits": 1829089280, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9294941776285046, "total_bits": 2051911680, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9585712462859719, "total_bits": 2313589120, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9621162887074446, "total_bits": 2371489792, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9659501760217705, "total_bits": 2549817728, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.979063157188265, "total_bits": 2914965888, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9806926975418863, "total_bits": 2957905920, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9795198769455677, "total_bits": 3006173568, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9821243185648009, "total_bits": 3064074240, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9898580933773988, "total_bits": 3698758016, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9913554838368375, "total_bits": 3756658688, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9945427862265589, "total_bits": 4278096256, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.994988271035254, "total_bits": 4441539584, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.995795306100167, "total_bits": 4839998464, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9985821192263087, "total_bits": 5662082048, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.14.self_attn": [ { "accuracy": 0.916202432623035, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9180160927537241, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.92972700062551, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9459662723698115, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9582060490195689, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9586123284932814, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9754843556959378, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9761458419655499, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9778668781448352, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9793567784424675, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9788200702322157, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9802238134863345, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9824789450748971, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9839427672620666, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9904928588749546, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9920184795842751, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9936674176715314, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9956683734260303, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9983236914880476, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.14.block_sparse_moe": [ { "accuracy": 0.9074882612024483, "total_bits": 1581846784, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9110314002946803, "total_bits": 1636897024, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9229472097205489, "total_bits": 1829089280, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9256350586288854, "total_bits": 2051911680, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9562946789358792, "total_bits": 2313589120, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9599329980188295, "total_bits": 2371489792, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9640237304725146, "total_bits": 2549817728, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9776939484535864, "total_bits": 2914965888, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9793846583189932, "total_bits": 2957905920, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.978274950365487, "total_bits": 3006173568, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9810984523868874, "total_bits": 3064074240, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9892634971459445, "total_bits": 3698758016, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.990830084210948, "total_bits": 3756658688, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.994142733318241, "total_bits": 4278096256, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9946599451359361, "total_bits": 4441539584, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9955196939041152, "total_bits": 4839998464, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9984644169147175, "total_bits": 5662082048, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.15.self_attn": [ { "accuracy": 0.910214204733309, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9127265961938783, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9264898219783055, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9431474404899698, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9547215639368484, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9550610188590853, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9738762601229706, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9748681478790546, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9769428834613216, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9780246798243177, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9770336125634218, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9786481051460693, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9816839022954044, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9834264408012754, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9898895459543717, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9916064394392857, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.993178044960491, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9957729683772317, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9981960479574474, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.15.block_sparse_moe": [ { "accuracy": 0.896523899545795, "total_bits": 1581846784, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9002315245176616, "total_bits": 1636897024, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9157736024966365, "total_bits": 1829089280, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9195202847844676, "total_bits": 2051911680, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9502214640378952, "total_bits": 2313589120, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9547426288849429, "total_bits": 2371489792, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9600102658334532, "total_bits": 2549817728, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9741458232073408, "total_bits": 2914965888, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9763929899781942, "total_bits": 2957905920, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9748979177522031, "total_bits": 3006173568, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9782903827236671, "total_bits": 3064074240, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9873457340229499, "total_bits": 3698758016, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9892727465760943, "total_bits": 3756658688, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9930088063701987, "total_bits": 4278096256, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9935342693995488, "total_bits": 4441539584, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9946590676462572, "total_bits": 4839998464, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9981502383322406, "total_bits": 5662082048, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.16.self_attn": [ { "accuracy": 0.9248751519541991, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9273689406874933, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9380726167245915, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9524976234687001, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9621274505594843, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9629117920994759, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9766717172766987, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9781217413807386, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9796651428271281, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.980605088321394, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.98085281512651, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9822097716264819, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9844745430899294, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.985576358035599, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9914003307931125, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9927930525553069, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.993938328771803, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9961202110433461, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9983754179219863, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.16.block_sparse_moe": [ { "accuracy": 0.9019071718579844, "total_bits": 1581846784, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.905277032601206, "total_bits": 1636897024, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9202075926096815, "total_bits": 1829089280, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9238606767826959, "total_bits": 2051911680, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9534354803985671, "total_bits": 2313589120, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9574646944866368, "total_bits": 2371489792, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9627644543192888, "total_bits": 2549817728, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.976499177123371, "total_bits": 2914965888, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9785081886833435, "total_bits": 2957905920, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9770585900956863, "total_bits": 3006173568, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9801160721598488, "total_bits": 3064074240, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9887693278050345, "total_bits": 3698758016, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9904636533097609, "total_bits": 3756658688, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.993983720894903, "total_bits": 4278096256, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.994502533116917, "total_bits": 4441539584, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9955992466083875, "total_bits": 4839998464, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9983886312938443, "total_bits": 5662082048, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.17.self_attn": [ { "accuracy": 0.927353847967951, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9329249196146664, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.943610639752526, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9554822593927383, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9631625336447829, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9658305678320558, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9789580759151202, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9800687248965627, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9813235665328408, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9820889919310024, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9819212372561819, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9832243367441391, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.985870974334447, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9867824900797323, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9919136155053581, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9933549004459852, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9943032974230224, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9966109868531164, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9984831992156902, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.17.block_sparse_moe": [ { "accuracy": 0.9048774967852392, "total_bits": 1581846784, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9083912894129753, "total_bits": 1636897024, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9211463316490776, "total_bits": 1829089280, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9240363555911341, "total_bits": 2051911680, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9553846740408948, "total_bits": 2313589120, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9590523670378485, "total_bits": 2371489792, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9634676532525766, "total_bits": 2549817728, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9773717696141255, "total_bits": 2914965888, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9791323247022534, "total_bits": 2957905920, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9779354223589364, "total_bits": 3006173568, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9807450296847444, "total_bits": 3064074240, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9890571342487084, "total_bits": 3698758016, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9906672552031907, "total_bits": 3756658688, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9940811393684462, "total_bits": 4278096256, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9945923208234537, "total_bits": 4441539584, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9954917905484572, "total_bits": 4839998464, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9984438416033395, "total_bits": 5662082048, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.18.self_attn": [ { "accuracy": 0.9426621355508503, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9423443537793661, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9530246604822183, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9621052176348472, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9709830131185683, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9718570904316086, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9808978106532442, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9825035657635645, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9844047992833351, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9850903565652276, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9853769444223297, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.986380965497933, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9881471818018901, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9892791555214085, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9933352529218322, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9945772679179514, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9950140060268735, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9972415298260259, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9985959064215422, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.18.block_sparse_moe": [ { "accuracy": 0.9095662926372728, "total_bits": 1581846784, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.912707301935083, "total_bits": 1636897024, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9245913122829638, "total_bits": 1829089280, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9272654946697385, "total_bits": 2051911680, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9575127143608897, "total_bits": 2313589120, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.960948856155339, "total_bits": 2371489792, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.965100116145454, "total_bits": 2549817728, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9786706856010776, "total_bits": 2914965888, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9803212548753149, "total_bits": 2957905920, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.978994027662434, "total_bits": 3006173568, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9816417630859896, "total_bits": 3064074240, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9896233837904507, "total_bits": 3698758016, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9911354199591044, "total_bits": 3756658688, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9944868792632693, "total_bits": 4278096256, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9948771291804549, "total_bits": 4441539584, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9957250996345752, "total_bits": 4839998464, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9985609189055762, "total_bits": 5662082048, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.19.self_attn": [ { "accuracy": 0.9370661077922896, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9386997762087145, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9530433086972487, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9605018681797542, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9676079006963655, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9704811785762247, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9798381700621623, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9818890725605582, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.983230293973496, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9836836655771262, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9850478390917966, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9858068748328247, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9880467953748608, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9892469397746027, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9934651391139548, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9945724511053413, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9947791119890386, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9974138994921783, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9985662904371949, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.19.block_sparse_moe": [ { "accuracy": 0.9111451659547656, "total_bits": 1581846784, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9142973307324083, "total_bits": 1636897024, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9255013999186064, "total_bits": 1829089280, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9280337201137292, "total_bits": 2051911680, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.957983608994829, "total_bits": 2313589120, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9614456116564964, "total_bits": 2371489792, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9654145234901654, "total_bits": 2549817728, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.978855999382703, "total_bits": 2914965888, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9805044677402628, "total_bits": 2957905920, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9791233171953967, "total_bits": 3006173568, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9818246230286988, "total_bits": 3064074240, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9896588750232599, "total_bits": 3698758016, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9911828828289321, "total_bits": 3756658688, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.994448157689093, "total_bits": 4278096256, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9948650564214117, "total_bits": 4441539584, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9956253841274271, "total_bits": 4839998464, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9985413056381635, "total_bits": 5662082048, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.20.self_attn": [ { "accuracy": 0.9457030035555363, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9457965995136061, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9582316289214712, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9663739424002797, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9724503584990376, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9729552895418907, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.982453209249989, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9837940367624948, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.983282304712032, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.983277547271236, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9859663481382948, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9872883571507899, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9890850506918994, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9898241735553663, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9939548614385881, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9949813122787562, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9952861121365506, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9974552460112854, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9987537633247772, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.20.block_sparse_moe": [ { "accuracy": 0.9119468076448691, "total_bits": 1581846784, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9149409561957184, "total_bits": 1636897024, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9257604556256219, "total_bits": 1829089280, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9282039166673234, "total_bits": 2051911680, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9583988029901919, "total_bits": 2313589120, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9617315246478507, "total_bits": 2371489792, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9655564200917357, "total_bits": 2549817728, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9791019399601378, "total_bits": 2914965888, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9807611211742225, "total_bits": 2957905920, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9794069032037729, "total_bits": 3006173568, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9820101140066981, "total_bits": 3064074240, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.989843503988691, "total_bits": 3698758016, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9913209057903212, "total_bits": 3756658688, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9946271568463233, "total_bits": 4278096256, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9949970869837623, "total_bits": 4441539584, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9957431105676254, "total_bits": 4839998464, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9986027003768342, "total_bits": 5662082048, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.21.self_attn": [ { "accuracy": 0.9420547632402495, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9446317094721293, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9559027793768206, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9638713286503365, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9709425744061407, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9718014126349437, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9812249879499799, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9827954999514317, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9842038799175307, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9848067334509993, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9859570207956591, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9869164829877647, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9887005343150935, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9897141926186649, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9936692170847795, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9948098210764951, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9950313313226951, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9973175655192646, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9986545597264347, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.21.block_sparse_moe": [ { "accuracy": 0.9138193422634351, "total_bits": 1581846784, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9166999237709924, "total_bits": 1636897024, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9270215869734162, "total_bits": 1829089280, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9293562548333093, "total_bits": 2051911680, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9591238708480408, "total_bits": 2313589120, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9625386849633957, "total_bits": 2371489792, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9662052853719184, "total_bits": 2549817728, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9793876067882306, "total_bits": 2914965888, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9810763406812361, "total_bits": 2957905920, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9798199153554282, "total_bits": 3006173568, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9823928314604258, "total_bits": 3064074240, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9900380968067207, "total_bits": 3698758016, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9914966536391723, "total_bits": 3756658688, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9946434770122563, "total_bits": 4278096256, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.995071840349977, "total_bits": 4441539584, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9957792213015062, "total_bits": 4839998464, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.998579159807904, "total_bits": 5662082048, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.22.self_attn": [ { "accuracy": 0.9471930198763546, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9498191834672501, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9585351386156521, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9663898856624177, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9733349061325977, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9750405227471339, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9819130654397764, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9843339896515796, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9855142012728673, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9860695932844752, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9868055958987066, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9880141178636175, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.989518814597671, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9904098668576855, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9941569331200107, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9952152201930355, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9953905812734248, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9975850072101151, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9987754085940603, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.22.block_sparse_moe": [ { "accuracy": 0.9176656027373514, "total_bits": 1581846784, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9204154336138776, "total_bits": 1636897024, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9299716722024114, "total_bits": 1829089280, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9321059900286951, "total_bits": 2051911680, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.961089680559541, "total_bits": 2313589120, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9641676722584587, "total_bits": 2371489792, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9675364571770555, "total_bits": 2549817728, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9805391047542033, "total_bits": 2914965888, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9820854463369438, "total_bits": 2957905920, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9807925557129478, "total_bits": 3006173568, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9832148885256365, "total_bits": 3064074240, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9905593977799934, "total_bits": 3698758016, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.991917425349943, "total_bits": 3756658688, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9950281573508523, "total_bits": 4278096256, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9953391299901628, "total_bits": 4441539584, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9959801378120717, "total_bits": 4839998464, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9986904501841453, "total_bits": 5662082048, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.23.self_attn": [ { "accuracy": 0.9524432279561695, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9530360379109257, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9641064432890791, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9699162048728842, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9746215462096428, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9762060763804536, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9833128412597274, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9853125596909147, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.986122182227279, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9864380322396755, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.987989904555051, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9885625831390682, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.99064354779885, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.991567972704376, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9945859536283502, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9956835696571752, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9955232734914476, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9978930688046507, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9988199585135781, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.23.block_sparse_moe": [ { "accuracy": 0.9204967839545325, "total_bits": 1581846784, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9230595927097296, "total_bits": 1636897024, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9317785262277252, "total_bits": 1829089280, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9337434821615094, "total_bits": 2051911680, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9621659109466955, "total_bits": 2313589120, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9652149861580447, "total_bits": 2371489792, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9683266715391686, "total_bits": 2549817728, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9811087438444558, "total_bits": 2914965888, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9826179286465049, "total_bits": 2957905920, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.981307726255373, "total_bits": 3006173568, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9836539208496872, "total_bits": 3064074240, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9907920802943408, "total_bits": 3698758016, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9921098601955333, "total_bits": 3756658688, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9951306316315344, "total_bits": 4278096256, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9954302021620893, "total_bits": 4441539584, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9960129390421667, "total_bits": 4839998464, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9987152881099304, "total_bits": 5662082048, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.24.self_attn": [ { "accuracy": 0.9499710435537916, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.950662459786001, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9622133842816478, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.968604315072298, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9742949208930919, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9749429447478369, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.98349716043786, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9839948990725373, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9858845195880062, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9862351651842657, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9871628187400731, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9879845070505613, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9900748384381203, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9908129974562478, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9943481932492241, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9954483362012788, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9953681989205315, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9977228532385963, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9987915921466131, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.24.block_sparse_moe": [ { "accuracy": 0.9221052409786927, "total_bits": 1581846784, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9246442421878639, "total_bits": 1636897024, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9328722651851804, "total_bits": 1829089280, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9347451446872008, "total_bits": 2051911680, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.962751935187139, "total_bits": 2313589120, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9657756126436748, "total_bits": 2371489792, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9687173461639568, "total_bits": 2549817728, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9813323744425648, "total_bits": 2914965888, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9828240042062182, "total_bits": 2957905920, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9815501413847271, "total_bits": 3006173568, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.983885766546193, "total_bits": 3064074240, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9908952838496158, "total_bits": 3698758016, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.992202242027576, "total_bits": 3756658688, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9951759405728233, "total_bits": 4278096256, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9954795483009595, "total_bits": 4441539584, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9960276171638581, "total_bits": 4839998464, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9987286387752791, "total_bits": 5662082048, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.25.self_attn": [ { "accuracy": 0.9513595190487409, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9528722892466345, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9632582637040239, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9693719534889648, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9753816164049663, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9758248702671967, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9832230314220253, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9844039228107584, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9860253931072197, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9864884662000757, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9873601074183458, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9883699642592355, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9902751693551085, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9910399240049484, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9944777478292388, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9956043592384575, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.995558013588092, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.997688677866551, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9988272385457285, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.25.block_sparse_moe": [ { "accuracy": 0.9221451184467265, "total_bits": 1581846784, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9246156305858964, "total_bits": 1636897024, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9324987399342813, "total_bits": 1829089280, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9342960805485123, "total_bits": 2051911680, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9627234475981248, "total_bits": 2313589120, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9657524878061131, "total_bits": 2371489792, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9685872759866088, "total_bits": 2549817728, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9812466469838431, "total_bits": 2914965888, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9827800785730544, "total_bits": 2957905920, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9815366607845614, "total_bits": 3006173568, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9838506254416547, "total_bits": 3064074240, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9908647740161732, "total_bits": 3698758016, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9921978681691384, "total_bits": 3756658688, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9951590524471708, "total_bits": 4278096256, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9954690486070161, "total_bits": 4441539584, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9959926817958292, "total_bits": 4839998464, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9987237213216232, "total_bits": 5662082048, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.26.self_attn": [ { "accuracy": 0.9486259669065475, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9516875302713168, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9609920637387979, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9688636825272912, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9745490510800952, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9752972233844431, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9835192375865421, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9848933926244315, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9858258776856881, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9864517002807636, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9871907775339327, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9879473856601276, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9899085606203267, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9905517408379206, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9944266022771204, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9953044906356617, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9957428037458541, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9975238920213949, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.998822687552188, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.26.block_sparse_moe": [ { "accuracy": 0.9222756347766048, "total_bits": 1581846784, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9248578573920225, "total_bits": 1636897024, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9327644543036034, "total_bits": 1829089280, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9345976147604615, "total_bits": 2051911680, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.962923229034794, "total_bits": 2313589120, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9659108238196686, "total_bits": 2371489792, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9687649021415334, "total_bits": 2549817728, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.98142835791958, "total_bits": 2914965888, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9829370938241482, "total_bits": 2957905920, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9817414911659924, "total_bits": 3006173568, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9840268836307683, "total_bits": 3064074240, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9910298103564664, "total_bits": 3698758016, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9923039041704645, "total_bits": 3756658688, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9952229320463774, "total_bits": 4278096256, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9955559315061883, "total_bits": 4441539584, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9960958779372863, "total_bits": 4839998464, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9987341178654644, "total_bits": 5662082048, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.27.self_attn": [ { "accuracy": 0.9442517226071734, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9469829584030729, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9566318827044022, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.964533438023768, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9715799759877356, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9729564628122669, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.981406915540758, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9823053215973472, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9838362307238736, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9845881582188764, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9860564412451104, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9871776697568988, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9887215339725739, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9896244840010217, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9936470297704402, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9948371632857934, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9949522354566541, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9972281611132386, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9986772067832613, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.27.block_sparse_moe": [ { "accuracy": 0.9208881749134314, "total_bits": 1581846784, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9235014735083831, "total_bits": 1636897024, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9314361455801287, "total_bits": 1829089280, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9333208040579369, "total_bits": 2051911680, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9619845160724301, "total_bits": 2313589120, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9650827256080351, "total_bits": 2371489792, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9679535704811937, "total_bits": 2549817728, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9807602280848905, "total_bits": 2914965888, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9823299683630466, "total_bits": 2957905920, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9811228912715849, "total_bits": 3006173568, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9835154746021879, "total_bits": 3064074240, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9906613345895159, "total_bits": 3698758016, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9919989972718453, "total_bits": 3756658688, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9949931211181378, "total_bits": 4278096256, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9953539220518187, "total_bits": 4441539584, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.995889257566121, "total_bits": 4839998464, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9986937264850559, "total_bits": 5662082048, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.28.self_attn": [ { "accuracy": 0.9372522989778143, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9399709017261079, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.953845058518805, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9635215137938136, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9690023592036021, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9697745632576315, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9815021029330397, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.982008520317705, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9835018401867465, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9843551027343461, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9844869119850429, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9856589606619979, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9882959338199151, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9892216198832581, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9935238386357301, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9945825652562474, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9950757999046657, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9972921312345486, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.998667761125896, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.28.block_sparse_moe": [ { "accuracy": 0.9171708159540829, "total_bits": 1581846784, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9199309633358529, "total_bits": 1636897024, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9283658834664446, "total_bits": 1829089280, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9304677865615016, "total_bits": 2051911680, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9604277128451749, "total_bits": 2313589120, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9636236376276142, "total_bits": 2371489792, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9667280017349281, "total_bits": 2549817728, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9800177510631712, "total_bits": 2914965888, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9816904697861326, "total_bits": 2957905920, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9805088709843787, "total_bits": 3006173568, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9829444338224436, "total_bits": 3064074240, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9904319891510042, "total_bits": 3698758016, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9917925807500356, "total_bits": 3756658688, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9948608006816357, "total_bits": 4278096256, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9952604927269644, "total_bits": 4441539584, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9958560369590199, "total_bits": 4839998464, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9986312081332711, "total_bits": 5662082048, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.29.self_attn": [ { "accuracy": 0.9399542720302155, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9415980632367887, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9550248304087865, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9628188687523729, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9701426660543994, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9701379417000633, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9820624949704659, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.982790415518378, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9843628367801246, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.984925673735377, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9853824486857966, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9860384722093218, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9884231762078247, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9896586935133919, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9938314549722954, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9948071711451599, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9953192318789661, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9972958471258416, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9987246007903626, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.29.block_sparse_moe": [ { "accuracy": 0.9114951401164657, "total_bits": 1581846784, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9145057397453409, "total_bits": 1636897024, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9236511982193119, "total_bits": 1829089280, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9259848980919311, "total_bits": 2051911680, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9569801417620558, "total_bits": 2313589120, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9608506985792988, "total_bits": 2371489792, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9642969829667556, "total_bits": 2549817728, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9780479350470399, "total_bits": 2914965888, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9799381740097153, "total_bits": 2957905920, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.978538320154736, "total_bits": 3006173568, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9814430636010671, "total_bits": 3064074240, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9893465798936392, "total_bits": 3698758016, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9910064476885294, "total_bits": 3756658688, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9941166113375833, "total_bits": 4278096256, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9947050358413866, "total_bits": 4441539584, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9953489905639895, "total_bits": 4839998464, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9984351548329485, "total_bits": 5662082048, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.30.self_attn": [ { "accuracy": 0.9307091906666756, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9331352622493317, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9472953494834273, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9573088438299141, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9656897741124818, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9664703729121309, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9791824818147641, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9800174725977214, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9816427059765709, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9825277818170818, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9831471301027035, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9837248535630735, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9870805295772458, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9878849405795336, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.992843183566277, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9940619997091984, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9945325405876103, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9969194934115206, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.99853286201649, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.30.block_sparse_moe": [ { "accuracy": 0.8995548851395908, "total_bits": 1581846784, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9034415197215582, "total_bits": 1636897024, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9131332855475576, "total_bits": 1829089280, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9155313472606634, "total_bits": 2051911680, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9499536980139582, "total_bits": 2313589120, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9558768392001328, "total_bits": 2371489792, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9595174865895196, "total_bits": 2549817728, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.975280621530194, "total_bits": 2914965888, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9773026373433439, "total_bits": 2957905920, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9749600906905375, "total_bits": 3006173568, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9791758556016966, "total_bits": 3064074240, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.987613779982846, "total_bits": 3698758016, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9899315809291837, "total_bits": 3756658688, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9932172610914629, "total_bits": 4278096256, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9938334694965497, "total_bits": 4441539584, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9944921806454659, "total_bits": 4839998464, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9981943596963232, "total_bits": 5662082048, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.31.self_attn": [ { "accuracy": 0.9271599058257907, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9318617582321167, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9461133338903126, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9602390233623355, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9655896744837886, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9663463224117693, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9783107272692417, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9791976095814454, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.980646914067237, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9814471597048012, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9825099036587697, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9835082461175165, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9860568437725306, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9872314710366098, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9924810869423183, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9935416606205859, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9943635753895107, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9966034654552411, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9984699864749258, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.31.block_sparse_moe": [ { "accuracy": 0.8714894950389862, "total_bits": 1581846784, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.8760565417377573, "total_bits": 1636897024, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.886748741920057, "total_bits": 1829089280, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8896532644958872, "total_bits": 2051911680, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9353843307808826, "total_bits": 2313589120, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9432532896140688, "total_bits": 2371489792, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9471299073805934, "total_bits": 2549817728, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9675060651804271, "total_bits": 2914965888, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9702967721478719, "total_bits": 2957905920, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9674451356067469, "total_bits": 3006173568, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9726809794083238, "total_bits": 3064074240, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9837010373900595, "total_bits": 3698758016, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9866214421528735, "total_bits": 3756658688, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9910344576316052, "total_bits": 4278096256, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9917836127508628, "total_bits": 4441539584, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9925049688838619, "total_bits": 4839998464, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9974016365350077, "total_bits": 5662082048, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.norm.norm": null, "lm_head.linear": null }, "last_module_idx": 66 }