{ "measurement": { "model.layers.0.self_attn": [ { "accuracy": 0.8751417398452759, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8905094861984253, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9064652919769287, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9348828792572021, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9349327683448792, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9396864175796509, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9437571167945862, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.949601411819458, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9639348983764648, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9655348062515259, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9701248407363892, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9761642217636108, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9720975160598755, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9783465266227722, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.983639657497406, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.987613320350647, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9851094484329224, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9904950857162476, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9936044812202454, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.0.block_sparse_moe": [ { "accuracy": 0.8973056077957153, "total_bits": 789481600, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9052672982215881, "total_bits": 817006720, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9214257597923279, "total_bits": 913102848, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9262127876281738, "total_bits": 1024514048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9601873755455017, "total_bits": 1155352768, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.96399986743927, "total_bits": 1184303104, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9709687232971191, "total_bits": 1273467072, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9780137538909912, "total_bits": 1456041152, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9798977375030518, "total_bits": 1477511168, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9793154001235962, "total_bits": 1501644992, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9813141226768494, "total_bits": 1530595328, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9878038763999939, "total_bits": 1847937216, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.987910807132721, "total_bits": 1876887552, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.990713894367218, "total_bits": 2137606336, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9908143281936646, "total_bits": 2219328000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9915716052055359, "total_bits": 2418557440, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9919507503509521, "total_bits": 2829599232, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.1.self_attn": [ { "accuracy": 0.884184718132019, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8929663300514221, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9100151062011719, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9362671971321106, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9372715950012207, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9423797130584717, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.947278618812561, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9533457159996033, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9633457064628601, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9645220041275024, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9694881439208984, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9744937419891357, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9710546135902405, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9765797853469849, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9826284050941467, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9872329235076904, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.984351396560669, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9916911721229553, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9940688610076904, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.1.block_sparse_moe": [ { "accuracy": 0.955030083656311, "total_bits": 789481600, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9613854289054871, "total_bits": 817006720, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9649631977081299, "total_bits": 913102848, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.965466320514679, "total_bits": 1024514048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9891144037246704, "total_bits": 1155352768, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9904773235321045, "total_bits": 1184303104, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9914672374725342, "total_bits": 1273467072, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9934831857681274, "total_bits": 1456041152, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9941635727882385, "total_bits": 1477511168, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9951044321060181, "total_bits": 1501644992, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9946239590644836, "total_bits": 1530595328, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9971449375152588, "total_bits": 1847937216, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9970027804374695, "total_bits": 1876887552, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9978227019309998, "total_bits": 2137606336, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.997911274433136, "total_bits": 2219328000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9980241656303406, "total_bits": 2418557440, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9980657696723938, "total_bits": 2829599232, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.2.self_attn": [ { "accuracy": 0.9884443879127502, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9892497062683105, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9903169274330139, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.991910457611084, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9935137033462524, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9937279224395752, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9957677125930786, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9960666298866272, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9962677359580994, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9964044690132141, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.996424674987793, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9968007802963257, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.997585117816925, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9976766705513, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9985828399658203, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9987077713012695, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9987487196922302, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9991167783737183, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9993364810943604, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.2.block_sparse_moe": [ { "accuracy": 0.9837253093719482, "total_bits": 789481600, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.984194278717041, "total_bits": 817006720, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9868018627166748, "total_bits": 913102848, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9874875545501709, "total_bits": 1024514048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9921532273292542, "total_bits": 1155352768, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9928560853004456, "total_bits": 1184303104, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9938076734542847, "total_bits": 1273467072, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.995986819267273, "total_bits": 1456041152, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9964112639427185, "total_bits": 1477511168, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9960554838180542, "total_bits": 1501644992, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9965893030166626, "total_bits": 1530595328, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9980182647705078, "total_bits": 1847937216, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9981729388237, "total_bits": 1876887552, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.998846709728241, "total_bits": 2137606336, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9989039897918701, "total_bits": 2219328000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9990532994270325, "total_bits": 2418557440, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9993494153022766, "total_bits": 2829599232, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.3.self_attn": [ { "accuracy": 0.9850694537162781, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9860561490058899, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9880993366241455, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9910649657249451, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9917168617248535, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9920117855072021, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9934768676757812, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9938939809799194, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9948017597198486, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.994970977306366, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9959473609924316, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9963490962982178, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9964137673377991, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9968531727790833, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9979065656661987, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.998172402381897, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9981381297111511, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.998751163482666, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9990325570106506, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.3.block_sparse_moe": [ { "accuracy": 0.978577733039856, "total_bits": 789481600, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9791688323020935, "total_bits": 817006720, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9823892116546631, "total_bits": 913102848, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9832536578178406, "total_bits": 1024514048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9895961880683899, "total_bits": 1155352768, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9905025959014893, "total_bits": 1184303104, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9917021989822388, "total_bits": 1273467072, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9947170615196228, "total_bits": 1456041152, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9952441453933716, "total_bits": 1477511168, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9947661757469177, "total_bits": 1501644992, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9954683184623718, "total_bits": 1530595328, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9973694682121277, "total_bits": 1847937216, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9975910186767578, "total_bits": 1876887552, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9984795451164246, "total_bits": 2137606336, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9985506534576416, "total_bits": 2219328000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9987359642982483, "total_bits": 2418557440, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9991334080696106, "total_bits": 2829599232, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.4.self_attn": [ { "accuracy": 0.9843248128890991, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9853833317756653, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.987926185131073, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9903342723846436, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9916975498199463, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9919493198394775, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9934250116348267, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9937542080879211, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9946245551109314, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9948005676269531, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9957748651504517, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9961651563644409, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9961907267570496, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9967246055603027, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9976656436920166, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9979232549667358, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9978621006011963, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.998350203037262, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9984957575798035, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.4.block_sparse_moe": [ { "accuracy": 0.9744744896888733, "total_bits": 789481600, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9752852916717529, "total_bits": 817006720, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9792431592941284, "total_bits": 913102848, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.980274498462677, "total_bits": 1024514048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9876105189323425, "total_bits": 1155352768, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9887597560882568, "total_bits": 1184303104, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9901764392852783, "total_bits": 1273467072, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9936513900756836, "total_bits": 1456041152, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9943193793296814, "total_bits": 1477511168, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9937913417816162, "total_bits": 1501644992, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.994633674621582, "total_bits": 1530595328, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9968817830085754, "total_bits": 1847937216, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9971445798873901, "total_bits": 1876887552, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9981667399406433, "total_bits": 2137606336, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9982768893241882, "total_bits": 2219328000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9984943270683289, "total_bits": 2418557440, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9989427328109741, "total_bits": 2829599232, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.5.self_attn": [ { "accuracy": 0.9817304611206055, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9826335310935974, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9855250716209412, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9887627363204956, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9905913472175598, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9909543991088867, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9930394291877747, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9933953285217285, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9943956136703491, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9945162534713745, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9954431056976318, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9957863092422485, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9960880875587463, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9964391589164734, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9977391362190247, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9978582262992859, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9980574250221252, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9984526038169861, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9989938139915466, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.5.block_sparse_moe": [ { "accuracy": 0.9707704782485962, "total_bits": 789481600, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.971696138381958, "total_bits": 817006720, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9762418270111084, "total_bits": 913102848, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9774571657180786, "total_bits": 1024514048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9858121275901794, "total_bits": 1155352768, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9870712161064148, "total_bits": 1184303104, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9887317419052124, "total_bits": 1273467072, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9927024841308594, "total_bits": 1456041152, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9934526681900024, "total_bits": 1477511168, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9929062724113464, "total_bits": 1501644992, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9938467144966125, "total_bits": 1530595328, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9964475035667419, "total_bits": 1847937216, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9967588186264038, "total_bits": 1876887552, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9979418516159058, "total_bits": 2137606336, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.998079240322113, "total_bits": 2219328000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9983417987823486, "total_bits": 2418557440, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9988794922828674, "total_bits": 2829599232, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.6.self_attn": [ { "accuracy": 0.9804203510284424, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.981393575668335, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9839239716529846, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9879498481750488, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9901560544967651, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9904938340187073, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9929506182670593, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.993294358253479, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9938555955886841, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9941362142562866, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9951253533363342, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.995417058467865, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9956203103065491, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9960533380508423, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9975101351737976, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9975008368492126, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.997912585735321, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9979925751686096, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9987144470214844, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.6.block_sparse_moe": [ { "accuracy": 0.9683820605278015, "total_bits": 789481600, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9693868160247803, "total_bits": 817006720, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9745656847953796, "total_bits": 913102848, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9759662747383118, "total_bits": 1024514048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9846575260162354, "total_bits": 1155352768, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9860272407531738, "total_bits": 1184303104, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9879274964332581, "total_bits": 1273467072, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9920893907546997, "total_bits": 1456041152, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9928973913192749, "total_bits": 1477511168, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9923037886619568, "total_bits": 1501644992, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9933342337608337, "total_bits": 1530595328, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9961398839950562, "total_bits": 1847937216, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.996465265750885, "total_bits": 1876887552, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9977338910102844, "total_bits": 2137606336, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9978920221328735, "total_bits": 2219328000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9981920719146729, "total_bits": 2418557440, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9987471103668213, "total_bits": 2829599232, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.7.self_attn": [ { "accuracy": 0.9771181344985962, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9782307744026184, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9806435704231262, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9853160381317139, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9885859489440918, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9889297485351562, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9919551014900208, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9923657178878784, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9928861260414124, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9932371973991394, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9942978620529175, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9947563409805298, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9950579404830933, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9953710436820984, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9972236156463623, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9972934722900391, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9977613687515259, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9978917241096497, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.998673677444458, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.7.block_sparse_moe": [ { "accuracy": 0.9660626649856567, "total_bits": 789481600, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9671629071235657, "total_bits": 817006720, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9724810123443604, "total_bits": 913102848, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9739189147949219, "total_bits": 1024514048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.983582079410553, "total_bits": 1155352768, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9850480556488037, "total_bits": 1184303104, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9869869351387024, "total_bits": 1273467072, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9915695786476135, "total_bits": 1456041152, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9924119114875793, "total_bits": 1477511168, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9917653203010559, "total_bits": 1501644992, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9928714036941528, "total_bits": 1530595328, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9958668351173401, "total_bits": 1847937216, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9962002038955688, "total_bits": 1876887552, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9975576996803284, "total_bits": 2137606336, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9977165460586548, "total_bits": 2219328000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9980207681655884, "total_bits": 2418557440, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.998611569404602, "total_bits": 2829599232, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.8.self_attn": [ { "accuracy": 0.9759218692779541, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9769536256790161, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9795733094215393, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9851830005645752, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9876247644424438, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9881879687309265, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9913622140884399, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9921379089355469, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9926151037216187, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9932717084884644, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9939464926719666, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9944887757301331, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9948018193244934, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9951046109199524, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9966264963150024, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9971638321876526, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9972131848335266, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9978854656219482, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9981712698936462, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.8.block_sparse_moe": [ { "accuracy": 0.9641352891921997, "total_bits": 789481600, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9652677178382874, "total_bits": 817006720, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9708629250526428, "total_bits": 913102848, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9723686575889587, "total_bits": 1024514048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9826596975326538, "total_bits": 1155352768, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9841823577880859, "total_bits": 1184303104, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9862193465232849, "total_bits": 1273467072, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9910719394683838, "total_bits": 1456041152, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.991970956325531, "total_bits": 1477511168, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.991306483745575, "total_bits": 1501644992, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9924567341804504, "total_bits": 1530595328, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9956172704696655, "total_bits": 1847937216, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9959607720375061, "total_bits": 1876887552, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9973883628845215, "total_bits": 2137606336, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9975603222846985, "total_bits": 2219328000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9978747367858887, "total_bits": 2418557440, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9984854459762573, "total_bits": 2829599232, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.9.self_attn": [ { "accuracy": 0.9738945364952087, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9746870994567871, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9769048094749451, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9819870591163635, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9870967864990234, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9873493313789368, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9910304546356201, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9915170669555664, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9923102855682373, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9925001859664917, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9935962557792664, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9936215281486511, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9938299059867859, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9944369196891785, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9966698288917542, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9964092969894409, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9973581433296204, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9970427751541138, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9983050227165222, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.9.block_sparse_moe": [ { "accuracy": 0.9617859125137329, "total_bits": 789481600, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.963042140007019, "total_bits": 817006720, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9690362215042114, "total_bits": 913102848, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9706609845161438, "total_bits": 1024514048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9814810156822205, "total_bits": 1155352768, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9831225872039795, "total_bits": 1184303104, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9853349924087524, "total_bits": 1273467072, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9904524087905884, "total_bits": 1456041152, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9914065599441528, "total_bits": 1477511168, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9906842708587646, "total_bits": 1501644992, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.991934061050415, "total_bits": 1530595328, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9953192472457886, "total_bits": 1847937216, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9956946969032288, "total_bits": 1876887552, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9972219467163086, "total_bits": 2137606336, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9974049925804138, "total_bits": 2219328000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9977452754974365, "total_bits": 2418557440, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9984101057052612, "total_bits": 2829599232, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.10.self_attn": [ { "accuracy": 0.9707123637199402, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9720385670661926, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9746705293655396, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9810404181480408, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9852327704429626, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9857784509658813, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9901778101921082, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9909883737564087, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9915879368782043, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9921466112136841, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.992764413356781, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.993274986743927, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9937085509300232, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9941796660423279, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9964044094085693, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9967056512832642, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9972561001777649, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9976088404655457, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9984726309776306, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.10.block_sparse_moe": [ { "accuracy": 0.960101306438446, "total_bits": 789481600, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9614546298980713, "total_bits": 817006720, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9680789709091187, "total_bits": 913102848, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9699031710624695, "total_bits": 1024514048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.980581521987915, "total_bits": 1155352768, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9823258519172668, "total_bits": 1184303104, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9847913384437561, "total_bits": 1273467072, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9898972511291504, "total_bits": 1456041152, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9909461140632629, "total_bits": 1477511168, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9902380704879761, "total_bits": 1501644992, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9915485978126526, "total_bits": 1530595328, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9950852990150452, "total_bits": 1847937216, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.995482325553894, "total_bits": 1876887552, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.997085452079773, "total_bits": 2137606336, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9972910284996033, "total_bits": 2219328000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9976792931556702, "total_bits": 2418557440, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9983670711517334, "total_bits": 2829599232, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.11.self_attn": [ { "accuracy": 0.9664958119392395, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9680699110031128, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9711652994155884, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.978232204914093, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9826652407646179, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9834066033363342, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9875680208206177, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9886051416397095, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9898054003715515, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9905601143836975, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9917736053466797, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9922497868537903, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9928052425384521, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9929709434509277, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9959088563919067, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.996296763420105, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9968017935752869, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9973285794258118, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9983463883399963, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.11.block_sparse_moe": [ { "accuracy": 0.9593594074249268, "total_bits": 789481600, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9607359170913696, "total_bits": 817006720, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9674556255340576, "total_bits": 913102848, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9693099856376648, "total_bits": 1024514048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9802408814430237, "total_bits": 1155352768, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9820162057876587, "total_bits": 1184303104, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9845170974731445, "total_bits": 1273467072, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9896552562713623, "total_bits": 1456041152, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9907470345497131, "total_bits": 1477511168, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9900413751602173, "total_bits": 1501644992, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9913748502731323, "total_bits": 1530595328, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9949579238891602, "total_bits": 1847937216, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9953409433364868, "total_bits": 1876887552, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9969601035118103, "total_bits": 2137606336, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9972264766693115, "total_bits": 2219328000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9976195096969604, "total_bits": 2418557440, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9983025789260864, "total_bits": 2829599232, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.12.self_attn": [ { "accuracy": 0.9626550674438477, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9643545746803284, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9685499668121338, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9752624034881592, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9808992147445679, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9814257621765137, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9863153100013733, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9870270490646362, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9880810976028442, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9888941049575806, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9902970790863037, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9908140897750854, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9910513162612915, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9918023943901062, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.99521803855896, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9954172968864441, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.996193528175354, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9967467188835144, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9981412887573242, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.12.block_sparse_moe": [ { "accuracy": 0.9550244808197021, "total_bits": 789481600, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.95663982629776, "total_bits": 817006720, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9637049436569214, "total_bits": 913102848, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9656469225883484, "total_bits": 1024514048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9780723452568054, "total_bits": 1155352768, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9800431132316589, "total_bits": 1184303104, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9827015995979309, "total_bits": 1273467072, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9885444641113281, "total_bits": 1456041152, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9896913766860962, "total_bits": 1477511168, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9889053106307983, "total_bits": 1501644992, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9903722405433655, "total_bits": 1530595328, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9943129420280457, "total_bits": 1847937216, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9947198033332825, "total_bits": 1876887552, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9964836835861206, "total_bits": 2137606336, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.996822714805603, "total_bits": 2219328000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9972203373908997, "total_bits": 2418557440, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9979792833328247, "total_bits": 2829599232, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.13.self_attn": [ { "accuracy": 0.9628887176513672, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9645645618438721, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9682385325431824, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9768204092979431, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9810636639595032, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9816300868988037, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9878069162368774, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9885682463645935, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9897643327713013, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9905561208724976, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9908974766731262, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9914373755455017, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9921591281890869, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9925897121429443, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9955868721008301, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9959070086479187, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9968015551567078, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9970945715904236, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.998440146446228, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.13.block_sparse_moe": [ { "accuracy": 0.9511893391609192, "total_bits": 789481600, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9528716206550598, "total_bits": 817006720, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9606553316116333, "total_bits": 913102848, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9628831148147583, "total_bits": 1024514048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9760656356811523, "total_bits": 1155352768, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9782522320747375, "total_bits": 1184303104, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9811895489692688, "total_bits": 1273467072, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9874409437179565, "total_bits": 1456041152, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9887565970420837, "total_bits": 1477511168, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9879243969917297, "total_bits": 1501644992, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9895527362823486, "total_bits": 1530595328, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9938852787017822, "total_bits": 1847937216, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9943557381629944, "total_bits": 1876887552, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9963069558143616, "total_bits": 2137606336, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9966025352478027, "total_bits": 2219328000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.997063159942627, "total_bits": 2418557440, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9979091286659241, "total_bits": 2829599232, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.14.self_attn": [ { "accuracy": 0.9569941163063049, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9592213034629822, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9633221626281738, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9725170135498047, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9773821830749512, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9785560369491577, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9846797585487366, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9863859415054321, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9875389337539673, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9883917570114136, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9891133308410645, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9897931814193726, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9905978441238403, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9912149906158447, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9945464730262756, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9951397776603699, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9957804679870605, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9965304136276245, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9980648159980774, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.14.block_sparse_moe": [ { "accuracy": 0.9479333758354187, "total_bits": 789481600, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9496796727180481, "total_bits": 817006720, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9578964710235596, "total_bits": 913102848, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9602400064468384, "total_bits": 1024514048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9744097590446472, "total_bits": 1155352768, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9768279194831848, "total_bits": 1184303104, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9799119830131531, "total_bits": 1273467072, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9863417744636536, "total_bits": 1456041152, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9878924489021301, "total_bits": 1477511168, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.987079381942749, "total_bits": 1501644992, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9887562990188599, "total_bits": 1530595328, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9933900833129883, "total_bits": 1847937216, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9938229322433472, "total_bits": 1876887552, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9959142208099365, "total_bits": 2137606336, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9962368607521057, "total_bits": 2219328000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9967025518417358, "total_bits": 2418557440, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9975429773330688, "total_bits": 2829599232, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.15.self_attn": [ { "accuracy": 0.9534876346588135, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.955331027507782, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9602120518684387, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9682555794715881, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.975091278553009, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9762969017028809, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9822013974189758, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9838548302650452, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9854117631912231, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9860984086990356, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9875822067260742, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9888445734977722, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9895575046539307, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9903632998466492, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9934635758399963, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9947208762168884, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9945287704467773, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.996428370475769, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9969809651374817, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.15.block_sparse_moe": [ { "accuracy": 0.9433757662773132, "total_bits": 789481600, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9452196359634399, "total_bits": 817006720, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9542100429534912, "total_bits": 913102848, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9567641019821167, "total_bits": 1024514048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9722742438316345, "total_bits": 1155352768, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9746926426887512, "total_bits": 1184303104, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9780974388122559, "total_bits": 1273467072, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9854938983917236, "total_bits": 1456041152, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9869449138641357, "total_bits": 1477511168, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9859891533851624, "total_bits": 1501644992, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.987822949886322, "total_bits": 1530595328, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9928993582725525, "total_bits": 1847937216, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9934495091438293, "total_bits": 1876887552, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9957567453384399, "total_bits": 2137606336, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9960713982582092, "total_bits": 2219328000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9966015815734863, "total_bits": 2418557440, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9976150393486023, "total_bits": 2829599232, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.16.self_attn": [ { "accuracy": 0.9558289647102356, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9574498534202576, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9628196954727173, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9711607694625854, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9773940443992615, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9782330989837646, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9852445125579834, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9863000512123108, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9874033331871033, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9878862500190735, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9888808131217957, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9896867275238037, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9906224608421326, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9914042353630066, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9946121573448181, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9952240586280823, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9958899617195129, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9966787099838257, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9981092810630798, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.16.block_sparse_moe": [ { "accuracy": 0.9367603659629822, "total_bits": 789481600, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9388013482093811, "total_bits": 817006720, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9488231539726257, "total_bits": 913102848, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9517994523048401, "total_bits": 1024514048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.968896746635437, "total_bits": 1155352768, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9716381430625916, "total_bits": 1184303104, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9754898548126221, "total_bits": 1273467072, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9836094379425049, "total_bits": 1456041152, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9852378964424133, "total_bits": 1477511168, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9842678308486938, "total_bits": 1501644992, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9863329529762268, "total_bits": 1530595328, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9920171499252319, "total_bits": 1847937216, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9926208257675171, "total_bits": 1876887552, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9951805472373962, "total_bits": 2137606336, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9955776333808899, "total_bits": 2219328000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.996174693107605, "total_bits": 2418557440, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9972937703132629, "total_bits": 2829599232, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.17.self_attn": [ { "accuracy": 0.9520993232727051, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9542604088783264, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9588183164596558, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9690974354743958, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9743528366088867, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9755899906158447, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9818028211593628, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9833949208259583, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9853978753089905, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9861086010932922, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9875546097755432, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9885079264640808, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9890788793563843, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.990022599697113, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9935770630836487, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9946915507316589, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9948438405990601, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9963769316673279, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9973258376121521, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.17.block_sparse_moe": [ { "accuracy": 0.9300394058227539, "total_bits": 789481600, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9322806000709534, "total_bits": 817006720, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9433746337890625, "total_bits": 913102848, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9469258189201355, "total_bits": 1024514048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9654805660247803, "total_bits": 1155352768, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9684857130050659, "total_bits": 1184303104, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9728274941444397, "total_bits": 1273467072, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9816161394119263, "total_bits": 1456041152, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9834931492805481, "total_bits": 1477511168, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9825193881988525, "total_bits": 1501644992, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9847764372825623, "total_bits": 1530595328, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9910696744918823, "total_bits": 1847937216, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9916917085647583, "total_bits": 1876887552, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.994512140750885, "total_bits": 2137606336, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9949963092803955, "total_bits": 2219328000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9956677556037903, "total_bits": 2418557440, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9968357086181641, "total_bits": 2829599232, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.18.self_attn": [ { "accuracy": 0.9514276385307312, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9536514282226562, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9587069749832153, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9670404195785522, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9746701121330261, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9757741093635559, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9818229675292969, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9832161068916321, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9847028851509094, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9851094484329224, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9871813654899597, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9883646965026855, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9885664582252502, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9897820949554443, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9927382469177246, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9943881034851074, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9936663508415222, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9962040185928345, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9976188540458679, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.18.block_sparse_moe": [ { "accuracy": 0.9241556525230408, "total_bits": 789481600, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9266494512557983, "total_bits": 817006720, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9386921525001526, "total_bits": 913102848, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9426650404930115, "total_bits": 1024514048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9626173973083496, "total_bits": 1155352768, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9658289551734924, "total_bits": 1184303104, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9705883264541626, "total_bits": 1273467072, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9799233078956604, "total_bits": 1456041152, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9819456338882446, "total_bits": 1477511168, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9810093641281128, "total_bits": 1501644992, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9834432601928711, "total_bits": 1530595328, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9902411699295044, "total_bits": 1847937216, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9908908009529114, "total_bits": 1876887552, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9938972592353821, "total_bits": 2137606336, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9944445490837097, "total_bits": 2219328000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9951662421226501, "total_bits": 2418557440, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9963693618774414, "total_bits": 2829599232, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.19.self_attn": [ { "accuracy": 0.9520105719566345, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9538459181785583, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9589889049530029, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9681817293167114, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9752156138420105, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9764891266822815, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.982934296131134, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9846328496932983, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9859267473220825, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9866163730621338, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9878649711608887, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9886929392814636, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9894260168075562, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9904581904411316, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9936633706092834, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9939572811126709, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9948374032974243, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9951703548431396, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9971715807914734, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.19.block_sparse_moe": [ { "accuracy": 0.9208112955093384, "total_bits": 789481600, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9233543872833252, "total_bits": 817006720, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9358251094818115, "total_bits": 913102848, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9400017261505127, "total_bits": 1024514048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9607851505279541, "total_bits": 1155352768, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9641975164413452, "total_bits": 1184303104, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9691514372825623, "total_bits": 1273467072, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9789302349090576, "total_bits": 1456041152, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9810194969177246, "total_bits": 1477511168, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.980036199092865, "total_bits": 1501644992, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9826532602310181, "total_bits": 1530595328, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9897034764289856, "total_bits": 1847937216, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9904369711875916, "total_bits": 1876887552, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9935228228569031, "total_bits": 2137606336, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9941257834434509, "total_bits": 2219328000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9948739409446716, "total_bits": 2418557440, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9961323142051697, "total_bits": 2829599232, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.20.self_attn": [ { "accuracy": 0.9561905264854431, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9581552743911743, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9624968767166138, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9708212018013, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9770295023918152, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9783568382263184, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9833893179893494, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9851038455963135, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9865272045135498, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9872649312019348, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9891023635864258, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9898583292961121, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.990464985370636, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9907716512680054, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9941059350967407, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9948921203613281, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9951224327087402, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9962242841720581, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9979238510131836, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.20.block_sparse_moe": [ { "accuracy": 0.9189481735229492, "total_bits": 789481600, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9213874340057373, "total_bits": 817006720, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.933992862701416, "total_bits": 913102848, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9379256367683411, "total_bits": 1024514048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9600663781166077, "total_bits": 1155352768, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9634088277816772, "total_bits": 1184303104, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9684367179870605, "total_bits": 1273467072, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9790289402008057, "total_bits": 1456041152, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9810811281204224, "total_bits": 1477511168, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9798307418823242, "total_bits": 1501644992, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9824165105819702, "total_bits": 1530595328, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9897812008857727, "total_bits": 1847937216, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9906140565872192, "total_bits": 1876887552, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9939163327217102, "total_bits": 2137606336, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9943554997444153, "total_bits": 2219328000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9951423406600952, "total_bits": 2418557440, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9966210722923279, "total_bits": 2829599232, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.21.self_attn": [ { "accuracy": 0.9602774381637573, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9620184302330017, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.966341495513916, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9738659858703613, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9799151420593262, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9806138277053833, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9856456518173218, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9866120219230652, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9878268241882324, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9883459210395813, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.989622950553894, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9905336499214172, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9908370971679688, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9916588664054871, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9945745468139648, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.995370626449585, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9954229593276978, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9968876242637634, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9978027939796448, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.21.block_sparse_moe": [ { "accuracy": 0.9176525473594666, "total_bits": 789481600, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9200504422187805, "total_bits": 817006720, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9325759410858154, "total_bits": 913102848, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.936272144317627, "total_bits": 1024514048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9594577550888062, "total_bits": 1155352768, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9627932906150818, "total_bits": 1184303104, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9677290916442871, "total_bits": 1273467072, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9789413213729858, "total_bits": 1456041152, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9809457659721375, "total_bits": 1477511168, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9795457720756531, "total_bits": 1501644992, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9821536540985107, "total_bits": 1530595328, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9896603226661682, "total_bits": 1847937216, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9905529022216797, "total_bits": 1876887552, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9939363598823547, "total_bits": 2137606336, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9943236112594604, "total_bits": 2219328000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9950947165489197, "total_bits": 2418557440, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9966923594474792, "total_bits": 2829599232, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.22.self_attn": [ { "accuracy": 0.966745913028717, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9683773517608643, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9718221426010132, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9772849082946777, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9814184308052063, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9827089905738831, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9855894446372986, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9872179627418518, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9884589910507202, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9887359142303467, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.991218626499176, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9917688965797424, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9921588897705078, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9927045702934265, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9950355291366577, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9957776069641113, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9956390261650085, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9972987174987793, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9980953335762024, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.22.block_sparse_moe": [ { "accuracy": 0.9167537689208984, "total_bits": 789481600, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9191410541534424, "total_bits": 817006720, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9313992261886597, "total_bits": 913102848, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9349713325500488, "total_bits": 1024514048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9589354395866394, "total_bits": 1155352768, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9622740745544434, "total_bits": 1184303104, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9671310186386108, "total_bits": 1273467072, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9787421226501465, "total_bits": 1456041152, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9807668328285217, "total_bits": 1477511168, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9792634844779968, "total_bits": 1501644992, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9819305539131165, "total_bits": 1530595328, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9895734786987305, "total_bits": 1847937216, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9905009865760803, "total_bits": 1876887552, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9939671754837036, "total_bits": 2137606336, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9943435192108154, "total_bits": 2219328000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9951132535934448, "total_bits": 2418557440, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9968230128288269, "total_bits": 2829599232, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.23.self_attn": [ { "accuracy": 0.9624196887016296, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9639043211936951, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9678547978401184, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.973944902420044, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9804320335388184, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9815521240234375, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.985444962978363, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9869240522384644, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9878811836242676, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9881135821342468, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9905280470848083, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9913113713264465, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.991692066192627, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9920592308044434, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9949548840522766, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9955685138702393, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9956687092781067, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9968059659004211, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9978312849998474, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.23.block_sparse_moe": [ { "accuracy": 0.9143187999725342, "total_bits": 789481600, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9167527556419373, "total_bits": 817006720, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9290357828140259, "total_bits": 913102848, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9326325058937073, "total_bits": 1024514048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9576324224472046, "total_bits": 1155352768, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9610482454299927, "total_bits": 1184303104, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9659402966499329, "total_bits": 1273467072, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9780495762825012, "total_bits": 1456041152, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9801176190376282, "total_bits": 1477511168, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9785990715026855, "total_bits": 1501644992, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9813228845596313, "total_bits": 1530595328, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9892236590385437, "total_bits": 1847937216, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9901890754699707, "total_bits": 1876887552, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.993776798248291, "total_bits": 2137606336, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9941657781600952, "total_bits": 2219328000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9949439167976379, "total_bits": 2418557440, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9967749118804932, "total_bits": 2829599232, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.24.self_attn": [ { "accuracy": 0.9626137614250183, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9638261795043945, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9683545827865601, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9743999242782593, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9802573919296265, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9811679720878601, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9855732917785645, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.986806333065033, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9876412749290466, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9885796904563904, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9901564717292786, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9910064339637756, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9914023280143738, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9923369288444519, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9949977993965149, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9954880475997925, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9957490563392639, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9966601729393005, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9979127645492554, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.24.block_sparse_moe": [ { "accuracy": 0.9123712182044983, "total_bits": 789481600, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9149122834205627, "total_bits": 817006720, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9271777272224426, "total_bits": 913102848, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9307891130447388, "total_bits": 1024514048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9565628170967102, "total_bits": 1155352768, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9601279497146606, "total_bits": 1184303104, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9650297164916992, "total_bits": 1273467072, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9775056838989258, "total_bits": 1456041152, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9796233773231506, "total_bits": 1477511168, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9780490398406982, "total_bits": 1501644992, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9808886051177979, "total_bits": 1530595328, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9889493584632874, "total_bits": 1847937216, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9899862408638, "total_bits": 1876887552, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9936296939849854, "total_bits": 2137606336, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9940075874328613, "total_bits": 2219328000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9947850108146667, "total_bits": 2418557440, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9966763854026794, "total_bits": 2829599232, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.25.self_attn": [ { "accuracy": 0.9615669250488281, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9630138278007507, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9674537777900696, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9737302660942078, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9798625111579895, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9810425639152527, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9852325320243835, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9867474436759949, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9881417751312256, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9883435368537903, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9904078841209412, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.991101861000061, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9917489886283875, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9921980500221252, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9950401782989502, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9954831600189209, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9958324432373047, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9966421723365784, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9983165860176086, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.25.block_sparse_moe": [ { "accuracy": 0.910025417804718, "total_bits": 789481600, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9126579165458679, "total_bits": 817006720, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9248712062835693, "total_bits": 913102848, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9285444617271423, "total_bits": 1024514048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9554370641708374, "total_bits": 1155352768, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9590091109275818, "total_bits": 1184303104, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9639074206352234, "total_bits": 1273467072, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.976810097694397, "total_bits": 1456041152, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9790138602256775, "total_bits": 1477511168, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9774888753890991, "total_bits": 1501644992, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9803431630134583, "total_bits": 1530595328, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9886630177497864, "total_bits": 1847937216, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9896842837333679, "total_bits": 1876887552, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9934572577476501, "total_bits": 2137606336, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9939171075820923, "total_bits": 2219328000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9947075247764587, "total_bits": 2418557440, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9967034459114075, "total_bits": 2829599232, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.26.self_attn": [ { "accuracy": 0.9630843997001648, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9647160768508911, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9685962200164795, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9752559661865234, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9803222417831421, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9812871813774109, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9856693744659424, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9869379997253418, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9881822466850281, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9886758327484131, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9905336499214172, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9911856055259705, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9917016625404358, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9922354221343994, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.994892954826355, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9958229660987854, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9956833124160767, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9971917271614075, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9981370568275452, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.26.block_sparse_moe": [ { "accuracy": 0.9075170159339905, "total_bits": 789481600, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9102041125297546, "total_bits": 817006720, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9224759936332703, "total_bits": 913102848, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9262111186981201, "total_bits": 1024514048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9540256857872009, "total_bits": 1155352768, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9577633738517761, "total_bits": 1184303104, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9627200365066528, "total_bits": 1273467072, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9760566353797913, "total_bits": 1456041152, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9783250093460083, "total_bits": 1477511168, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9767502546310425, "total_bits": 1501644992, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9797237515449524, "total_bits": 1530595328, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9882933497428894, "total_bits": 1847937216, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9893499612808228, "total_bits": 1876887552, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9932170510292053, "total_bits": 2137606336, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9936389327049255, "total_bits": 2219328000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9944278001785278, "total_bits": 2418557440, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9964339733123779, "total_bits": 2829599232, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.27.self_attn": [ { "accuracy": 0.9617936015129089, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9633975028991699, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9680721759796143, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9750210046768188, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9795563817024231, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9806675910949707, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.985278844833374, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9868117570877075, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9877163171768188, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9879364371299744, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9901169538497925, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9907286167144775, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9913630485534668, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9918218851089478, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9949977397918701, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9952319860458374, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9959171414375305, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9963887929916382, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9978615641593933, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.27.block_sparse_moe": [ { "accuracy": 0.9048731923103333, "total_bits": 789481600, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9076915979385376, "total_bits": 817006720, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9202004671096802, "total_bits": 913102848, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9240267872810364, "total_bits": 1024514048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9526344537734985, "total_bits": 1155352768, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9564889073371887, "total_bits": 1184303104, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9615671634674072, "total_bits": 1273467072, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9752382636070251, "total_bits": 1456041152, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9776009917259216, "total_bits": 1477511168, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9760178327560425, "total_bits": 1501644992, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9790842533111572, "total_bits": 1530595328, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9878911972045898, "total_bits": 1847937216, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9889597296714783, "total_bits": 1876887552, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9929184317588806, "total_bits": 2137606336, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9934102296829224, "total_bits": 2219328000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9942166805267334, "total_bits": 2418557440, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9962531328201294, "total_bits": 2829599232, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.28.self_attn": [ { "accuracy": 0.9552621245384216, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9571794271469116, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9624937176704407, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9703412055969238, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9770101308822632, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9779216647148132, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9834486246109009, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9847697615623474, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9858402013778687, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9867061376571655, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9887192249298096, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9894887804985046, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.990312397480011, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9906100630760193, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9942272901535034, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9949139952659607, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.995221734046936, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9964014887809753, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9975267648696899, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.28.block_sparse_moe": [ { "accuracy": 0.896470308303833, "total_bits": 789481600, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.8996202945709229, "total_bits": 817006720, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9137989282608032, "total_bits": 913102848, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9181637167930603, "total_bits": 1024514048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9484235644340515, "total_bits": 1155352768, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9526388645172119, "total_bits": 1184303104, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9584135413169861, "total_bits": 1273467072, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9728785157203674, "total_bits": 1456041152, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9754793047904968, "total_bits": 1477511168, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9738481044769287, "total_bits": 1501644992, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9771828651428223, "total_bits": 1530595328, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9867613315582275, "total_bits": 1847937216, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9878963232040405, "total_bits": 1876887552, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.992160975933075, "total_bits": 2137606336, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9927269220352173, "total_bits": 2219328000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9936355948448181, "total_bits": 2418557440, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.995759129524231, "total_bits": 2829599232, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.29.self_attn": [ { "accuracy": 0.9413414597511292, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9437582492828369, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9517562985420227, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.962276816368103, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9701902270317078, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9708524346351624, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9797618389129639, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.980522096157074, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9818471074104309, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9830434322357178, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9850006699562073, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9858828186988831, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9870431423187256, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9878430366516113, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9928874969482422, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9932199120521545, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9942694902420044, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9952515959739685, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9975231885910034, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.29.block_sparse_moe": [ { "accuracy": 0.8947432041168213, "total_bits": 789481600, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.8979575037956238, "total_bits": 817006720, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9125786423683167, "total_bits": 913102848, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9170454144477844, "total_bits": 1024514048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9473415017127991, "total_bits": 1155352768, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.95162034034729, "total_bits": 1184303104, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9576559066772461, "total_bits": 1273467072, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9721574187278748, "total_bits": 1456041152, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9747515916824341, "total_bits": 1477511168, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9731867909431458, "total_bits": 1501644992, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.976493239402771, "total_bits": 1530595328, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9863741397857666, "total_bits": 1847937216, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9871267080307007, "total_bits": 1876887552, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9917719960212708, "total_bits": 2137606336, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9923770427703857, "total_bits": 2219328000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9932936429977417, "total_bits": 2418557440, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9953127503395081, "total_bits": 2829599232, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.30.self_attn": [ { "accuracy": 0.9392677545547485, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.941457211971283, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9494084119796753, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9606375694274902, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9682103991508484, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9688822627067566, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9767459034919739, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9774910807609558, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9801525473594666, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9809303283691406, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9836052060127258, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.985017716884613, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.985795795917511, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9868364930152893, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9918144941329956, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9924085140228271, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9930877089500427, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9947546124458313, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9962158799171448, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.30.block_sparse_moe": [ { "accuracy": 0.8722478151321411, "total_bits": 789481600, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.8759552240371704, "total_bits": 817006720, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.8929286003112793, "total_bits": 913102848, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8979101181030273, "total_bits": 1024514048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9368959665298462, "total_bits": 1155352768, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9416853189468384, "total_bits": 1184303104, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9486551880836487, "total_bits": 1273467072, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9651004076004028, "total_bits": 1456041152, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.968464195728302, "total_bits": 1477511168, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9666235446929932, "total_bits": 1501644992, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.970598042011261, "total_bits": 1530595328, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9829833507537842, "total_bits": 1847937216, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9833393096923828, "total_bits": 1876887552, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.98890221118927, "total_bits": 2137606336, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9899843335151672, "total_bits": 2219328000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9909735918045044, "total_bits": 2418557440, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9930081963539124, "total_bits": 2829599232, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.31.self_attn": [ { "accuracy": 0.9269536733627319, "total_bits": 89141248, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9314975738525391, "total_bits": 91697152, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9415920972824097, "total_bits": 95234560, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9586028456687927, "total_bits": 111748096, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9648119211196899, "total_bits": 132388864, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9658591151237488, "total_bits": 132455936, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.976678729057312, "total_bits": 169089024, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9779554009437561, "total_bits": 169221632, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9797047972679138, "total_bits": 170671104, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9804589152336121, "total_bits": 173039616, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9818172454833984, "total_bits": 174398976, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9827343225479126, "total_bits": 175225856, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9841172099113464, "total_bits": 178728960, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9850719571113586, "total_bits": 181067776, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9906227588653564, "total_bits": 219944960, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9899834394454956, "total_bits": 223010816, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9922504425048828, "total_bits": 252975104, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.991467297077179, "total_bits": 265314304, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9942261576652527, "total_bits": 336861184, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.31.block_sparse_moe": [ { "accuracy": 0.8593258261680603, "total_bits": 789481600, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.8635109663009644, "total_bits": 817006720, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.8822191953659058, "total_bits": 913102848, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.8874595761299133, "total_bits": 1024514048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9296282529830933, "total_bits": 1155352768, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9360440969467163, "total_bits": 1184303104, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9434522986412048, "total_bits": 1273467072, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9627110958099365, "total_bits": 1456041152, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9665490984916687, "total_bits": 1477511168, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9639008045196533, "total_bits": 1501644992, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9688078761100769, "total_bits": 1530595328, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9813665151596069, "total_bits": 1847937216, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9828467965126038, "total_bits": 1876887552, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.988312304019928, "total_bits": 2137606336, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9893947839736938, "total_bits": 2219328000, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9905049204826355, "total_bits": 2418557440, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9930692911148071, "total_bits": 2829599232, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.norm.norm": null, "lm_head.linear": null }, "last_module_idx": 66 }