{ "measurement": { "model.layers.0.self_attn": [ { "accuracy": 0.8927343095603743, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9063137365799201, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9185273778673849, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9436571605895695, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9450778704332679, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9484100851573443, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9540501717281968, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9579793115783679, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9655700524974811, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9669250372405115, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9744767008447334, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9772342247398276, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9762421776785662, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9793364945799112, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9859041354962086, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9892266060862887, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9876006479424081, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9940485582735977, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9966068060480451, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.0.block_sparse_moe": [ { "accuracy": 0.9030880277094088, "total_bits": 3163693568, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9086198159738591, "total_bits": 3273794048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9171444256054728, "total_bits": 3658178560, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9191674182289525, "total_bits": 4103823360, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9665305773286443, "total_bits": 4627178240, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9688340402943524, "total_bits": 4742979584, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9727783123717496, "total_bits": 5099635456, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9835290242672751, "total_bits": 5829931776, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9850735402617016, "total_bits": 5915811840, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9838322821612421, "total_bits": 6012347136, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9856628818731559, "total_bits": 6128148480, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9917912122205292, "total_bits": 7397516032, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9929236106966671, "total_bits": 7513317376, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9956077111664375, "total_bits": 8556192512, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9957104809704775, "total_bits": 8883079168, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9964838445823836, "total_bits": 9679996928, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9987601683824323, "total_bits": 11324164096, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.1.self_attn": [ { "accuracy": 0.9048799762600347, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9102665468266136, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9232270200001567, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9432037357044847, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9447873356310945, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9552908696626362, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9545150789383211, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9660995602607727, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9704845690805661, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9713042693114594, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9764831385722286, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9777006733378297, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9784284559519667, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9798988814612752, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9865372913252366, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9901911717300352, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9886457672538725, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9950138238797847, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.996778014557142, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.1.block_sparse_moe": [ { "accuracy": 0.9487595386607082, "total_bits": 3163693568, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9526886275332225, "total_bits": 3273794048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9533808429382349, "total_bits": 3658178560, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9534995988207429, "total_bits": 4103823360, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9922328577438173, "total_bits": 4627178240, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9921259501961207, "total_bits": 4742979584, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9923325794393589, "total_bits": 5099635456, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.99772244385799, "total_bits": 5829931776, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9978273396876542, "total_bits": 5915811840, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9977205767921221, "total_bits": 6012347136, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9976507024471893, "total_bits": 6128148480, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9985919947962669, "total_bits": 7397516032, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9986946923217099, "total_bits": 7513317376, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9990662500985261, "total_bits": 8556192512, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9989959821886212, "total_bits": 8883079168, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9990341316825881, "total_bits": 9679996928, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9993346753403066, "total_bits": 11324164096, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.2.self_attn": [ { "accuracy": 0.9906863677560499, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9910240213962664, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9919754625298083, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9929370776946215, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9955075339469324, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.99566349204887, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.996363117350388, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9965236906703611, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9969393782237986, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9971548830875882, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9977320348185267, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9979766794774485, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9979892164548417, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9982493751740549, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9989067510775241, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.999098613292996, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9990470588581, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.999361597703491, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9997231424816786, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.2.block_sparse_moe": [ { "accuracy": 0.9881758729829208, "total_bits": 3163693568, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9886304237144558, "total_bits": 3273794048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9897464298410341, "total_bits": 3658178560, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.989985528426539, "total_bits": 4103823360, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9945088673174676, "total_bits": 4627178240, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9949362149774587, "total_bits": 4742979584, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9952734378442217, "total_bits": 5099635456, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9972353403266568, "total_bits": 5829931776, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9974536032363845, "total_bits": 5915811840, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9972370584253344, "total_bits": 6012347136, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9975816652034777, "total_bits": 6128148480, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9985952611298815, "total_bits": 7397516032, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9987981842875199, "total_bits": 7513317376, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9992458613472225, "total_bits": 8556192512, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9992795119986496, "total_bits": 8883079168, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9993403236198661, "total_bits": 9679996928, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9998018933723583, "total_bits": 11324164096, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.3.self_attn": [ { "accuracy": 0.9812224179898438, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9817026160338795, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9825156987277105, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9843439636997094, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9919869029575861, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9924260417342579, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9933289336285701, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9939231877215207, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9943950836742786, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9941157641281423, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9963558812558307, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9968985555430041, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9968296592123806, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9972663742373697, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9981658644759782, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9983448157987656, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9984484556235226, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9987140507406653, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.99955834257516, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.3.block_sparse_moe": [ { "accuracy": 0.9855747392131505, "total_bits": 3163693568, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9860868274699897, "total_bits": 3273794048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9875211122470271, "total_bits": 3658178560, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9878317521223309, "total_bits": 4103823360, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9932137164225998, "total_bits": 4627178240, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9937485363334417, "total_bits": 4742979584, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9941694032106745, "total_bits": 5099635456, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9965869438754836, "total_bits": 5829931776, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9968553682587011, "total_bits": 5915811840, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.996584249225347, "total_bits": 6012347136, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9970060479275784, "total_bits": 6128148480, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9982638223491928, "total_bits": 7397516032, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9985119822453462, "total_bits": 7513317376, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9990701406013135, "total_bits": 8556192512, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9991102746035291, "total_bits": 8883079168, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9991850875284352, "total_bits": 9679996928, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9997553485367174, "total_bits": 11324164096, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.4.self_attn": [ { "accuracy": 0.9810080849075395, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9815884371974358, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9824709834678, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9839131255928231, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9892953082208374, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.989431134143256, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.991023167048728, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9914673456162411, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9928065987524429, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9933526435327765, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9948833246629587, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9955151213369774, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9956823174878465, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9969079271663519, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9980814889483889, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9982852476364075, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9983839193668437, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9988428183668906, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9995884063767, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.4.block_sparse_moe": [ { "accuracy": 0.9811534073301836, "total_bits": 3163693568, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9819072693047163, "total_bits": 3273794048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9838461965861681, "total_bits": 3658178560, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9842572564161137, "total_bits": 4103823360, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9911742958999974, "total_bits": 4627178240, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9918771301285902, "total_bits": 4742979584, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9924346514499599, "total_bits": 5099635456, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9955200622758926, "total_bits": 5829931776, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9958739824145787, "total_bits": 5915811840, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9955603916191889, "total_bits": 6012347136, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9961121706414575, "total_bits": 6128148480, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9977467899995023, "total_bits": 7397516032, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9980687630733499, "total_bits": 7513317376, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9987795007611183, "total_bits": 8556192512, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9988484007802694, "total_bits": 8883079168, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9989510454638788, "total_bits": 9679996928, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9996806427041424, "total_bits": 11324164096, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.5.self_attn": [ { "accuracy": 0.9789889930551382, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9798625956387504, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9813149288590801, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9840390488848483, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9887679906709022, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9891786046424195, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9911824227800887, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.991582257158466, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9929442881042824, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9940935697369767, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9934911736973414, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9945823101044976, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9948801190322755, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9959352082009182, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9975595801278312, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9976338190608658, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9979784976948347, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9981264154622821, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9994798940705981, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.5.block_sparse_moe": [ { "accuracy": 0.978664982402207, "total_bits": 3163693568, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9794988515541742, "total_bits": 3273794048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9818984559541079, "total_bits": 3658178560, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9823995209053943, "total_bits": 4103823360, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9900134621849773, "total_bits": 4627178240, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9907987947992393, "total_bits": 4742979584, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.991492827729273, "total_bits": 5099635456, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9949344000350194, "total_bits": 5829931776, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9953306115195645, "total_bits": 5915811840, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9949753440205792, "total_bits": 6012347136, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.995594392873739, "total_bits": 6128148480, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9974528453465993, "total_bits": 7397516032, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9978119775861207, "total_bits": 7513317376, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9986229499075929, "total_bits": 8556192512, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9986978719979035, "total_bits": 8883079168, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9988255320380344, "total_bits": 9679996928, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9996380922970466, "total_bits": 11324164096, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.6.self_attn": [ { "accuracy": 0.9801530870608985, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9810805393284873, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9827629441493436, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.985897419729123, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9893713921529094, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9893691590742061, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9917782015828905, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9918578816087622, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9929375834150338, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9908309326458135, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.994881523913998, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9953351687688968, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9955153813659164, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9959387625199988, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9975020906422287, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9979940563391306, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9980190419224336, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9985896650851875, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9994310817197211, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.6.block_sparse_moe": [ { "accuracy": 0.9776027040850175, "total_bits": 3163693568, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9784829923147825, "total_bits": 3273794048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.981191170843024, "total_bits": 3658178560, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9817737596795747, "total_bits": 4103823360, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9895515017162421, "total_bits": 4627178240, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.99036740070503, "total_bits": 4742979584, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9911944134356944, "total_bits": 5099635456, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9946750295471007, "total_bits": 5829931776, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9950973538951459, "total_bits": 5915811840, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9947444905441156, "total_bits": 6012347136, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9953915867096695, "total_bits": 6128148480, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9973329473746402, "total_bits": 7397516032, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9977101906306894, "total_bits": 7513317376, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9985530056399415, "total_bits": 8556192512, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9986389000949106, "total_bits": 8883079168, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.998797874572981, "total_bits": 9679996928, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.99961844305998, "total_bits": 11324164096, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.7.self_attn": [ { "accuracy": 0.9743354919023419, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9765030686302405, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9780584038480332, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9811292772524451, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9886405154768574, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9887926166288947, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9910441720122961, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9912087257191735, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9923568385481638, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9928741868168703, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.994693204086568, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9950728735849751, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.995316978354056, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9959371596770851, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9974497343217464, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9978319646998945, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9979981508314315, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9984374974462155, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.999455043678417, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.7.block_sparse_moe": [ { "accuracy": 0.9761692356612337, "total_bits": 3163693568, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9771110268407747, "total_bits": 3273794048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9799116757257205, "total_bits": 3658178560, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9805126848483556, "total_bits": 4103823360, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9888679240830243, "total_bits": 4627178240, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9897484848962018, "total_bits": 4742979584, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9905926439494482, "total_bits": 5099635456, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9943489707197601, "total_bits": 5829931776, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9947966575610304, "total_bits": 5915811840, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9943843715530085, "total_bits": 6012347136, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9950898664721631, "total_bits": 6128148480, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9971432989267143, "total_bits": 7397516032, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9975574453189773, "total_bits": 7513317376, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.998454818575594, "total_bits": 8556192512, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9985353879869523, "total_bits": 8883079168, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.998696172758508, "total_bits": 9679996928, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9995920681386977, "total_bits": 11324164096, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.8.self_attn": [ { "accuracy": 0.974129844603962, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9751904495060444, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.975648588304849, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9789879968340852, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9868776146567574, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9868660909123719, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9898797866128581, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9898726452191017, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9917887980240936, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9920861386007777, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9938687135244867, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9947838878479639, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9950133602894646, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9952706728240868, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9972465989811002, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9976793081119755, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9979576880179689, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9984562546137328, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9994688858170871, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.8.block_sparse_moe": [ { "accuracy": 0.9755102310721812, "total_bits": 3163693568, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9764318370113247, "total_bits": 3273794048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9794021783592669, "total_bits": 3658178560, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9800484928694603, "total_bits": 4103823360, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9885358208741405, "total_bits": 4627178240, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9894486446987445, "total_bits": 4742979584, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9903730194231397, "total_bits": 5099635456, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9941705012767527, "total_bits": 5829931776, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9946439135942216, "total_bits": 5915811840, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9942233957178694, "total_bits": 6012347136, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9949401448186683, "total_bits": 6128148480, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9970614799347363, "total_bits": 7397516032, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9974818280101508, "total_bits": 7513317376, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9984098398157297, "total_bits": 8556192512, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9984939534670526, "total_bits": 8883079168, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9986763424377912, "total_bits": 9679996928, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9995763799616773, "total_bits": 11324164096, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.9.self_attn": [ { "accuracy": 0.9756803335621953, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9761956865948281, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9778441340221387, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9808948237056795, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9885536599659214, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9886777684043505, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.991538453553068, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9917482153733114, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9932947080999025, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9935158609143017, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9944790795102323, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9947784310574398, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9950758834173413, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9952556556580883, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9973475005122294, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9976480028585923, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9980240286554602, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.998284305075159, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.999437510385178, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.9.block_sparse_moe": [ { "accuracy": 0.9740286844439412, "total_bits": 3163693568, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.975021544990963, "total_bits": 3273794048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9781646162859703, "total_bits": 3658178560, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9788647816565476, "total_bits": 4103823360, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9877682545859563, "total_bits": 4627178240, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.98875437086252, "total_bits": 4742979584, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.989763476077075, "total_bits": 5099635456, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9937655327866148, "total_bits": 5829931776, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9942622609853157, "total_bits": 5915811840, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9938142160934053, "total_bits": 6012347136, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9945949230857781, "total_bits": 6128148480, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9968502688295159, "total_bits": 7397516032, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9973070924109044, "total_bits": 7513317376, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9982895516839466, "total_bits": 8556192512, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9983852999346134, "total_bits": 8883079168, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9985830129896242, "total_bits": 9679996928, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.999542831842279, "total_bits": 11324164096, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.10.self_attn": [ { "accuracy": 0.9726283873774504, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9744550657801723, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9759016623720527, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9797264493903831, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9873161876672193, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.987692568794285, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9909440006717647, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9914310881010208, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9921085494080264, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9921999614391672, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.993920676802334, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9941370894789303, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9944584813385614, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9951674014660775, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9970085442335787, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9975362319045847, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9977708517935274, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9984206288783369, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9994046645588242, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.10.block_sparse_moe": [ { "accuracy": 0.9741594452214869, "total_bits": 3163693568, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9751966971609938, "total_bits": 3273794048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9784920966056617, "total_bits": 3658178560, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9792085844827326, "total_bits": 4103823360, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9879032868744904, "total_bits": 4627178240, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.988877116898565, "total_bits": 4742979584, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9898996584264463, "total_bits": 5099635456, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9937895100906884, "total_bits": 5829931776, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9942993475369325, "total_bits": 5915811840, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9938960598757196, "total_bits": 6012347136, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.994663145331862, "total_bits": 6128148480, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9968940198274427, "total_bits": 7397516032, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9973411147151828, "total_bits": 7513317376, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9982989382911719, "total_bits": 8556192512, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9984105051905652, "total_bits": 8883079168, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9986121274720828, "total_bits": 9679996928, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9995460096696135, "total_bits": 11324164096, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.11.self_attn": [ { "accuracy": 0.9717566045981488, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9727210995594138, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9739466825696198, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.977895011880288, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9850950814342421, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9857169677209305, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9887099445454384, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9896553056069503, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9894165103078673, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9918764737606245, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9931830030522848, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9936998801820568, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9940053753334245, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9942942600403177, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9966107784941989, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9973084222843969, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9973281711266425, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9982206545813058, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9992012991799704, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.11.block_sparse_moe": [ { "accuracy": 0.973086140951828, "total_bits": 3163693568, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9741813558594961, "total_bits": 3273794048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9776249818111721, "total_bits": 3658178560, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9784137030181131, "total_bits": 4103823360, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9872889469720816, "total_bits": 4627178240, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.988322922273686, "total_bits": 4742979584, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9894400855869447, "total_bits": 5099635456, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9934101234508776, "total_bits": 5829931776, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9939674752470302, "total_bits": 5915811840, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9935808113506553, "total_bits": 6012347136, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9943831744358728, "total_bits": 6128148480, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9967321758219776, "total_bits": 7397516032, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9972006141784062, "total_bits": 7513317376, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9981959326204991, "total_bits": 8556192512, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9983325180763035, "total_bits": 8883079168, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9985584396697385, "total_bits": 9679996928, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9995112105228289, "total_bits": 11324164096, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.12.self_attn": [ { "accuracy": 0.9680189107985873, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9694653479382396, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9716157833310334, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9758653836324811, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9831658846845752, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9833465666804266, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9869960105340732, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9871167150620175, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9885900349013115, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9891478124466774, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9915508438195837, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9923678095771098, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9926356300711632, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9934991380984062, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9960954873638815, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9968042824552149, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9969077858105792, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9979333787280331, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.999052864353871, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.12.block_sparse_moe": [ { "accuracy": 0.9712577095549357, "total_bits": 3163693568, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9724118628686196, "total_bits": 3273794048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9759485300532297, "total_bits": 3658178560, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9767520446330309, "total_bits": 4103823360, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9863755776742963, "total_bits": 4627178240, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9875114198136878, "total_bits": 4742979584, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9886423577799609, "total_bits": 5099635456, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9929941416178879, "total_bits": 5829931776, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9935722657307786, "total_bits": 5915811840, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9930939978895414, "total_bits": 6012347136, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9939774745879205, "total_bits": 6128148480, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9964668099633663, "total_bits": 7397516032, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9969927333908057, "total_bits": 7513317376, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9980672756945225, "total_bits": 8556192512, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9981849942154153, "total_bits": 8883079168, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9984085122797344, "total_bits": 9679996928, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9994459930121115, "total_bits": 11324164096, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.13.self_attn": [ { "accuracy": 0.9692998341725845, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9710053508905204, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9735228482045626, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9794315301175964, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9850372978437104, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9853623099625111, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9902039017822397, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9906474259240847, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9915249138384273, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9918947881577831, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9926776826063073, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9929902172696433, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9934642378878045, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9938343888712361, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9964044266809268, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9969350891249058, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.997412791990332, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9980641188387955, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9992658774089681, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.13.block_sparse_moe": [ { "accuracy": 0.9682975168290892, "total_bits": 3163693568, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9695394376390859, "total_bits": 3273794048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9735709570936466, "total_bits": 3658178560, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9745098202930469, "total_bits": 4103823360, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9849135719337746, "total_bits": 4627178240, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9861536747285802, "total_bits": 4742979584, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9874562445880943, "total_bits": 5099635456, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9921922211314699, "total_bits": 5829931776, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.992854455338889, "total_bits": 5915811840, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9923761994700486, "total_bits": 6012347136, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9933403497194185, "total_bits": 6128148480, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9961217650347144, "total_bits": 7397516032, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9966817953636086, "total_bits": 7513317376, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9978634714712634, "total_bits": 8556192512, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9980209783871511, "total_bits": 8883079168, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9982761447471077, "total_bits": 9679996928, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9994266751164105, "total_bits": 11324164096, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.14.self_attn": [ { "accuracy": 0.9633467096443239, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9653954030455727, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9683693779730483, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9745786599324722, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9820675445034316, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9824700589340768, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9877681247889996, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9882694032092235, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9895028126867194, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9894669568166137, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9911685550193253, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9918634261519305, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9923824109382143, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9930733214990285, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9956155345334035, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9963796787932026, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9966879397765488, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.997632738993209, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9990332246065678, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.14.block_sparse_moe": [ { "accuracy": 0.9665373434361658, "total_bits": 3163693568, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9678925892436191, "total_bits": 3273794048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9720675915871796, "total_bits": 3658178560, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.973059084756594, "total_bits": 4103823360, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9840946414654976, "total_bits": 4627178240, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9854350502749807, "total_bits": 4742979584, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9867914475157464, "total_bits": 5099635456, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9916828189927497, "total_bits": 5829931776, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9924078687020627, "total_bits": 5915811840, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9919491184042081, "total_bits": 6012347136, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9929801452120668, "total_bits": 6128148480, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9958941983426676, "total_bits": 7397516032, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.99649645665685, "total_bits": 7513317376, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9977084608920115, "total_bits": 8556192512, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9979095425775373, "total_bits": 8883079168, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9981810129278241, "total_bits": 9679996928, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9993750999595872, "total_bits": 11324164096, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.15.self_attn": [ { "accuracy": 0.9587114276854616, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9605471293785071, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9638095209865194, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9695593713733711, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9796576387690086, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9800980393039552, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9850304587499091, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9856083616614342, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9876146279030332, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9883697819650957, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9902390172263902, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9906882679727125, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.991039695291731, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9919426626721887, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9948660507920737, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9960061138201701, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9958490656051588, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9976818452429909, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.998925783990049, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.15.block_sparse_moe": [ { "accuracy": 0.9646718976529021, "total_bits": 3163693568, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9660620195300955, "total_bits": 3273794048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9704071753903439, "total_bits": 3658178560, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9714194625420006, "total_bits": 4103823360, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9832061792870885, "total_bits": 4627178240, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9845649784822997, "total_bits": 4742979584, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9859563885256648, "total_bits": 5099635456, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9912993187378896, "total_bits": 5829931776, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9920238156459833, "total_bits": 5915811840, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.991518495359311, "total_bits": 6012347136, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9925791419258243, "total_bits": 6128148480, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9956849255280471, "total_bits": 7397516032, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9963014293094411, "total_bits": 7513317376, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9976249200255168, "total_bits": 8556192512, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9977984775507235, "total_bits": 8883079168, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9980662064136643, "total_bits": 9679996928, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9993557084970618, "total_bits": 11324164096, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.16.self_attn": [ { "accuracy": 0.9606716368151338, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9617921094361105, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9652378063060736, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9704111903709801, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9792717941301433, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9795874765534934, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9843883647240306, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9847027071702638, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.986352728067064, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9877746126026308, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9909458550330448, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9912068257108331, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9918774995266607, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9927592736815936, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9955148429634344, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9963201990865759, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9965404827196739, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9976121238310283, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9990867300611602, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.16.block_sparse_moe": [ { "accuracy": 0.9622118808329105, "total_bits": 3163693568, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9637770191031068, "total_bits": 3273794048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9682961044538962, "total_bits": 3658178560, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9693962392446241, "total_bits": 4103823360, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9820274702322326, "total_bits": 4627178240, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9835335000565177, "total_bits": 4742979584, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9850094166927432, "total_bits": 5099635456, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9906069554241472, "total_bits": 5829931776, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9914062109922892, "total_bits": 5915811840, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9909132183039266, "total_bits": 6012347136, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9920789876854733, "total_bits": 6128148480, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9953723748361594, "total_bits": 7397516032, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9960513842096063, "total_bits": 7513317376, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9974232866293996, "total_bits": 8556192512, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9976463066352728, "total_bits": 8883079168, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9979339581240263, "total_bits": 9679996928, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9992979122632134, "total_bits": 11324164096, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.17.self_attn": [ { "accuracy": 0.9607790564431956, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9627957389150795, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9659502725852163, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9736579456121514, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9801250652557141, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9804054461419582, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9860055582891953, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.986409543229169, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9878590142256335, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9880224535192706, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9897606408125476, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9907773207431954, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9910461907275021, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.991932264896796, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9947711139745814, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9959455007123516, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.99595631803631, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9975495182320868, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9989297412501305, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.17.block_sparse_moe": [ { "accuracy": 0.9578969346261338, "total_bits": 3163693568, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9596339375957063, "total_bits": 3273794048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.964590854060493, "total_bits": 3658178560, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9658363236996689, "total_bits": 4103823360, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9800180372242865, "total_bits": 4627178240, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9816535127006079, "total_bits": 4742979584, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9833148544360149, "total_bits": 5099635456, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9895463013090193, "total_bits": 5829931776, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9904437520492234, "total_bits": 5915811840, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9899036647800944, "total_bits": 6012347136, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9911828902304957, "total_bits": 6128148480, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9948658752990397, "total_bits": 7397516032, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9956067858239341, "total_bits": 7513317376, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9971404651449504, "total_bits": 8556192512, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9973921147849116, "total_bits": 8883079168, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9977162214130849, "total_bits": 9679996928, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9992223590869751, "total_bits": 11324164096, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.18.self_attn": [ { "accuracy": 0.9596748630467214, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9615935769520307, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.964860025597246, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9710447392181346, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9790981908475882, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9795970063852636, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9847660486243273, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9853564892944536, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9868900095553774, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9880461866330159, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9898543627932668, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9905718159371693, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.990942711696813, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9920312341939854, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9946737280728197, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9959586909481961, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9956419776242814, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9974716893385017, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.998742044428486, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.18.block_sparse_moe": [ { "accuracy": 0.9552191208655897, "total_bits": 3163693568, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9571064734145215, "total_bits": 3273794048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9622641370484704, "total_bits": 3658178560, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9636346552717059, "total_bits": 4103823360, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9787322508759404, "total_bits": 4627178240, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9804641974501704, "total_bits": 4742979584, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9822374132804965, "total_bits": 5099635456, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9887667938478684, "total_bits": 5829931776, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9897526908714912, "total_bits": 5915811840, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9892639164126625, "total_bits": 6012347136, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9906200714360335, "total_bits": 6128148480, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9945413924778175, "total_bits": 7397516032, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9953271416015923, "total_bits": 7513317376, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9969349465651536, "total_bits": 8556192512, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9972343160648292, "total_bits": 8883079168, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9975815885332658, "total_bits": 9679996928, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9991587462075251, "total_bits": 11324164096, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.19.self_attn": [ { "accuracy": 0.9588423403666207, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9601413854642918, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9618916485066477, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9670033885264083, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9772919533088019, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9782077552456605, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9824908911495617, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9835329576836604, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9880808498220224, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.989287146685743, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9903323757462204, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9909658408233601, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9913004636764526, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9914173483113317, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9951461570729551, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9961254622934288, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9962832302124681, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9975838387188943, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9989784655136693, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.19.block_sparse_moe": [ { "accuracy": 0.9544335399803362, "total_bits": 3163693568, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9563157678042588, "total_bits": 3273794048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9613487750880028, "total_bits": 3658178560, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9626799009152149, "total_bits": 4103823360, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.978380221481386, "total_bits": 4627178240, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9801134047049441, "total_bits": 4742979584, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9818710292640486, "total_bits": 5099635456, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9886346916226965, "total_bits": 5829931776, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9896189408180746, "total_bits": 5915811840, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9890772709622979, "total_bits": 6012347136, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9904365326572013, "total_bits": 6128148480, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9944376763642618, "total_bits": 7397516032, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.995235378265773, "total_bits": 7513317376, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9968947740271688, "total_bits": 8556192512, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9971765044856032, "total_bits": 8883079168, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9975161114523775, "total_bits": 9679996928, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9991449049955822, "total_bits": 11324164096, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.20.self_attn": [ { "accuracy": 0.9640616050088092, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9657064992934465, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9685631630648124, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9749877698915569, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9818207229438581, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9821445581650263, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9872186150597898, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9875737794331814, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9894093207916931, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9903805290201777, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9909965223387668, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9919080013458274, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9922644109220097, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9929319315737015, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.99553295314361, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9963776847519177, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9966200532906345, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9977985789423416, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9990143536828997, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.20.block_sparse_moe": [ { "accuracy": 0.9561803080141544, "total_bits": 3163693568, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9577519181919725, "total_bits": 3273794048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9622882595775943, "total_bits": 3658178560, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9634194696616185, "total_bits": 4103823360, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9789235763447849, "total_bits": 4627178240, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9806274152115771, "total_bits": 4742979584, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9821445257648042, "total_bits": 5099635456, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9891164411643618, "total_bits": 5829931776, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9900322366309794, "total_bits": 5915811840, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9893658661136502, "total_bits": 6012347136, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.990697572595979, "total_bits": 6128148480, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9946068038330659, "total_bits": 7397516032, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9953704814573652, "total_bits": 7513317376, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9970464285977773, "total_bits": 8556192512, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9972557131397096, "total_bits": 8883079168, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9975379699240684, "total_bits": 9679996928, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9991966259181745, "total_bits": 11324164096, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.21.self_attn": [ { "accuracy": 0.9696155017928073, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9712208481015343, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9741388615220785, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9795908092178012, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9844273092519296, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9848122063436007, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9889196790754795, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9893558675080145, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9908263207609324, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9914247376942321, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.99247828493581, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9930368867996884, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9933675836134506, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9939170256747227, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9962256691345063, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9969258344070496, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9971073732931951, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9980748979632106, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9991852816770246, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.21.block_sparse_moe": [ { "accuracy": 0.9577852384628434, "total_bits": 3163693568, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9592334671240104, "total_bits": 3273794048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9633605674301323, "total_bits": 3658178560, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9643668685304492, "total_bits": 4103823360, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9795976956620028, "total_bits": 4627178240, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9812483163550496, "total_bits": 4742979584, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9825932339912182, "total_bits": 5099635456, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9895202788328262, "total_bits": 5829931776, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9903989588645729, "total_bits": 5915811840, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9896923034746004, "total_bits": 6012347136, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9909720737732163, "total_bits": 6128148480, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9947608082653269, "total_bits": 7397516032, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9955082541258123, "total_bits": 7513317376, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9971504969840967, "total_bits": 8556192512, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9973262627180176, "total_bits": 8883079168, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9975698868817601, "total_bits": 9679996928, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9992269913077747, "total_bits": 11324164096, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.22.self_attn": [ { "accuracy": 0.9753486117053973, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9768567697978333, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9791314650425001, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9827873340660804, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9873412428913932, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9877031482265968, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9903251762795997, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9911880215482884, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9925592173086969, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9932042541423518, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9936697448505775, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9942019189168748, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9945677322531609, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9952785028879972, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9968074349269859, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9975870187136981, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9974081055628822, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9985546128162625, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9992676115637694, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.22.block_sparse_moe": [ { "accuracy": 0.9570417072820037, "total_bits": 3163693568, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9584607028059269, "total_bits": 3273794048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.962516998676093, "total_bits": 3658178560, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9634930731630639, "total_bits": 4103823360, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9791327612474561, "total_bits": 4627178240, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9807871795308433, "total_bits": 4742979584, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9820720702409744, "total_bits": 5099635456, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9893011448806838, "total_bits": 5829931776, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9901960452675427, "total_bits": 5915811840, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9894707738666942, "total_bits": 6012347136, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.990759672866644, "total_bits": 6128148480, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9946612079848388, "total_bits": 7397516032, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.995406142288917, "total_bits": 7513317376, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9971074650774857, "total_bits": 8556192512, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9972797895622391, "total_bits": 8883079168, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9975091053148437, "total_bits": 9679996928, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9992225442669893, "total_bits": 11324164096, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.23.self_attn": [ { "accuracy": 0.973126635347542, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9750476955975357, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9773407544156438, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9812493738963416, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9869353669254404, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9873303064684334, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9905402837811332, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.991042913501396, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9921958757270324, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9925724887289107, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9935792839938873, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9940602396320748, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9943757364771476, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9949277933245819, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.996816597229458, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9974483139935488, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9974816333576056, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9984432052404276, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9992823415055969, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.23.block_sparse_moe": [ { "accuracy": 0.9563436834631782, "total_bits": 3163693568, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9577935140972075, "total_bits": 3273794048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9617662726852455, "total_bits": 3658178560, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9627242050084629, "total_bits": 4103823360, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9787209732948166, "total_bits": 4627178240, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9804131655806774, "total_bits": 4742979584, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9816653300566893, "total_bits": 5099635456, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9890774948228347, "total_bits": 5829931776, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9899931440963164, "total_bits": 5915811840, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9892564179424784, "total_bits": 6012347136, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.990574057311996, "total_bits": 6128148480, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9945483153538877, "total_bits": 7397516032, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9953126670229003, "total_bits": 7513317376, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9970433303326565, "total_bits": 8556192512, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9972226192537499, "total_bits": 8883079168, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9974451572756822, "total_bits": 9679996928, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9992063414262559, "total_bits": 11324164096, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.24.self_attn": [ { "accuracy": 0.9746944976872519, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9757393622084668, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9780531992253504, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9814517904367102, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9868966627277826, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9872016169522938, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9906645135120734, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9909840996720289, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9923150419306598, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9929738595934683, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9935429079065982, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9941605311622354, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9945383529717985, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9951209723091635, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9969038679264486, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9975153433011943, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9975208674532998, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9984810925640264, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9992873490371398, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.24.block_sparse_moe": [ { "accuracy": 0.9554924632569677, "total_bits": 3163693568, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9569464004353473, "total_bits": 3273794048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9608145434605448, "total_bits": 3658178560, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9617597221543914, "total_bits": 4103823360, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9782461748507462, "total_bits": 4627178240, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9799975086199609, "total_bits": 4742979584, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9811969918542003, "total_bits": 5099635456, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9888361587287172, "total_bits": 5829931776, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9897834698326493, "total_bits": 5915811840, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9890115451508839, "total_bits": 6012347136, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9903701259999683, "total_bits": 6128148480, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9944192389467437, "total_bits": 7397516032, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9952144394015991, "total_bits": 7513317376, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9969750265713389, "total_bits": 8556192512, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9971565713395217, "total_bits": 8883079168, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9973664795673501, "total_bits": 9679996928, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9991875710302817, "total_bits": 11324164096, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.25.self_attn": [ { "accuracy": 0.9758843368801632, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9769561434850881, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9796612796030546, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9831484090046663, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9877177321499115, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9883544292104872, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.991460518813447, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9922100345996258, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9927998690277731, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9934130144599629, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9939984854702887, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9942045493639613, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.994623773461698, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9953945898818538, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9970037716832992, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9976607161316726, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.997618391264328, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9985596225535693, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9993525538930522, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.25.block_sparse_moe": [ { "accuracy": 0.9540335359541994, "total_bits": 3163693568, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9555699380212709, "total_bits": 3273794048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9594872100768905, "total_bits": 3658178560, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9604622372671178, "total_bits": 4103823360, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9775305498685491, "total_bits": 4627178240, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.979331404853024, "total_bits": 4742979584, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9805504413517682, "total_bits": 5099635456, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9884377423379767, "total_bits": 5829931776, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9894370545486086, "total_bits": 5915811840, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9886619967996682, "total_bits": 6012347136, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9900582717045358, "total_bits": 6128148480, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9942427417018304, "total_bits": 7397516032, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9950591916782096, "total_bits": 7513317376, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9968749187200477, "total_bits": 8556192512, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9970733656449929, "total_bits": 8883079168, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9972858475950105, "total_bits": 9679996928, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9991612442352465, "total_bits": 11324164096, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.26.self_attn": [ { "accuracy": 0.9749782369717171, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9763102332424176, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9785684813794336, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9823501078706038, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9871359168993015, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9873888979322816, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9908604231712065, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9911857834821077, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9921427873012266, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9927242974946765, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9936705039274928, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9943700324940054, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9945975528343728, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.995112427608355, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9968170980114097, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9975683828141835, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9974557605547536, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.998529096734465, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9992965909462471, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.26.block_sparse_moe": [ { "accuracy": 0.9523639622095385, "total_bits": 3163693568, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.953927107351391, "total_bits": 3273794048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9578789869617474, "total_bits": 3658178560, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9588678179210738, "total_bits": 4103823360, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.976687543437277, "total_bits": 4627178240, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9785591488900153, "total_bits": 4742979584, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9797876878690562, "total_bits": 5099635456, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9880078642285968, "total_bits": 5829931776, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9890321862246645, "total_bits": 5915811840, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9882386439528904, "total_bits": 6012347136, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9896810436131138, "total_bits": 6128148480, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9940374507078606, "total_bits": 7397516032, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9948717636472889, "total_bits": 7513317376, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9967612432021844, "total_bits": 8556192512, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9969669987790679, "total_bits": 8883079168, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9971829989900518, "total_bits": 9679996928, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9991344218663136, "total_bits": 11324164096, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.27.self_attn": [ { "accuracy": 0.9727881259628033, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.974121087279759, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9777420671950829, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9814362788180772, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.986713989931894, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9871265264227986, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9905111081898212, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9910501910030449, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9918360161398979, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9920424809679389, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9933784815090659, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.994006330465996, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.994244771127246, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9944441623152479, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9967501370089227, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9973248382356312, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9974910165244517, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.998403655941059, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9992653633755828, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.27.block_sparse_moe": [ { "accuracy": 0.9492465990938639, "total_bits": 3163693568, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9509995444432685, "total_bits": 3273794048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9550728887123496, "total_bits": 3658178560, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9561033742012162, "total_bits": 4103823360, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9751167668911972, "total_bits": 4627178240, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9771400064622101, "total_bits": 4742979584, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9784126572899128, "total_bits": 5099635456, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9871372061928636, "total_bits": 5829931776, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9882405594873586, "total_bits": 5915811840, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.987433654452233, "total_bits": 6012347136, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9889967789334294, "total_bits": 6128148480, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9936213781076827, "total_bits": 7397516032, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9945249655283988, "total_bits": 7513317376, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.99651325234261, "total_bits": 8556192512, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9967528861016035, "total_bits": 8883079168, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.996977861768468, "total_bits": 9679996928, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.999064978402059, "total_bits": 11324164096, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.28.self_attn": [ { "accuracy": 0.9664694231591726, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9685291643989713, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.972597909895213, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9777398309425304, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9839193185693339, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9843298701667472, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9885614245492769, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9893210045012989, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9903858334414268, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9907775346287772, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9919375964244338, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9927346145391072, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9931390312451281, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9938808282788255, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9963227827469573, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9969365868202754, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9971841983129516, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9981091103308197, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9991795669224015, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.28.block_sparse_moe": [ { "accuracy": 0.9431047380754822, "total_bits": 3163693568, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.945149510510658, "total_bits": 3273794048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9497160123367059, "total_bits": 3658178560, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9508985177074608, "total_bits": 4103823360, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9721432042945373, "total_bits": 4627178240, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9744144857517982, "total_bits": 4742979584, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9758776010161168, "total_bits": 5099635456, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9854697322865066, "total_bits": 5829931776, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9867065598894107, "total_bits": 5915811840, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9859033735272916, "total_bits": 6012347136, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9876551953095355, "total_bits": 6128148480, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9928330119062019, "total_bits": 7397516032, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9938503470222809, "total_bits": 7513317376, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9960461396748495, "total_bits": 8556192512, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9963618201130119, "total_bits": 8883079168, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9966302002706614, "total_bits": 9679996928, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9989361021097897, "total_bits": 11324164096, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.29.self_attn": [ { "accuracy": 0.957096274353956, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9597425992159467, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.965274363462078, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9724127174796242, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.976889965565581, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9779241176224068, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9833636776004967, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9845743882714918, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9873583810894113, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9878187209465786, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9892489515048893, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9897997129561478, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9906914836580032, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9913838532704272, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9948310355753883, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9958245905581862, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9959767667674705, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9974845883277196, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9989096333554602, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.29.block_sparse_moe": [ { "accuracy": 0.941074979148413, "total_bits": 3163693568, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9433593895090253, "total_bits": 3273794048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9481498518664586, "total_bits": 3658178560, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.949387186844098, "total_bits": 4103823360, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9712408831048953, "total_bits": 4627178240, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9736053838737702, "total_bits": 4742979584, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9751923074455637, "total_bits": 5099635456, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9849093315240584, "total_bits": 5829931776, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9861578816842091, "total_bits": 5915811840, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9854489150702169, "total_bits": 6012347136, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9872536562187108, "total_bits": 6128148480, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9926036551937854, "total_bits": 7397516032, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9936531708624802, "total_bits": 7513317376, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.995876879161714, "total_bits": 8556192512, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9962441812784067, "total_bits": 8883079168, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9965426301044461, "total_bits": 9679996928, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9988902398985565, "total_bits": 11324164096, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.30.self_attn": [ { "accuracy": 0.957538928934618, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9593795633041545, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9665144957405956, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9739829545938655, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9800169611545769, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9806120824068785, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9864381359596002, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9871032729156708, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9883383535534928, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9888693482958173, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9901027261573625, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9907827659902212, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9916152410013112, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9923141048485903, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9952406034510779, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9960809812734002, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9963926362412933, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9976764147133125, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9989737635570284, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.30.block_sparse_moe": [ { "accuracy": 0.9415707801909823, "total_bits": 3163693568, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9436571307872471, "total_bits": 3273794048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9482753229768652, "total_bits": 3658178560, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9495013607175726, "total_bits": 4103823360, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9712551621426093, "total_bits": 4627178240, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9736744547752958, "total_bits": 4742979584, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9752490251374087, "total_bits": 5099635456, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9847809878717128, "total_bits": 5829931776, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9860571502266746, "total_bits": 5915811840, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9853997763344332, "total_bits": 6012347136, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9871931133399668, "total_bits": 6128148480, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9925565863714406, "total_bits": 7397516032, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9935485242964014, "total_bits": 7513317376, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9958247821410432, "total_bits": 8556192512, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9962255183337746, "total_bits": 8883079168, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9965470873162543, "total_bits": 9679996928, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9988499127855328, "total_bits": 11324164096, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.31.self_attn": [ { "accuracy": 0.9586047814472726, "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9604728123859355, "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9660988255943123, "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9718520996209822, "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9806693496770764, "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9814251518171084, "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9865540845417663, "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9876932341997561, "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9888443540487635, "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 64 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9887367435485909, "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9898228748505445, "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9908927056546274, "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.991149512609761, "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 64 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 64, "4": 64 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9922918547435027, "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "5": 32 }, "bits": [ 5 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9954177318385949, "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9960410601434935, "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "accuracy": 0.9964582211288967, "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9974041306046083, "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9989293693797663, "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "k_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "v_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "o_proj": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.layers.31.block_sparse_moe": [ { "accuracy": 0.9520071748840182, "total_bits": 3163693568, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9536487406217738, "total_bits": 3273794048, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "6": 32, "3": 64, "2": 64 }, "bits": [ 6, 3, 2 ], "bits_prop": [ 0.05, 0.2, 0.75 ], "scale_bits": 4 } }, { "accuracy": 0.9568513805340779, "total_bits": 3658178560, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "3": 32 }, "bits": [ 5, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9577835406127729, "total_bits": 4103823360, "w1": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "3": 64, "2": 64 }, "bits": [ 3, 2 ], "bits_prop": [ 0.3, 0.7 ], "scale_bits": 4 }, "w2": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9767458840812507, "total_bits": 4627178240, "w1": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 128, "3": 128 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128, "3": 128 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.978604474006907, "total_bits": 4742979584, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32, "3": 32 }, "bits": [ 8, 4, 3 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9797589670082456, "total_bits": 5099635456, "w1": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9876951015995521, "total_bits": 5829931776, "w1": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 128 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9888119341462458, "total_bits": 5915811840, "w1": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "4": 32 }, "bits": [ 8, 4 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9881861806651088, "total_bits": 6012347136, "w1": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 128, "4": 128 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 128, "4": 128 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9896288022496983, "total_bits": 6128148480, "w1": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "5": 32, "4": 32 }, "bits": [ 8, 5, 4 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9939493448228428, "total_bits": 7397516032, "w1": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128, "5": 128 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128, "5": 128 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9947971954059444, "total_bits": 7513317376, "w1": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.1, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9966090647993904, "total_bits": 8556192512, "w1": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 32, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "accuracy": 0.9969089955434596, "total_bits": 8883079168, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.15, 0.85 ], "scale_bits": 4 } }, { "accuracy": 0.9971677733487204, "total_bits": 9679996928, "w1": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } }, { "accuracy": 0.9989987402958306, "total_bits": 11324164096, "w1": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w3": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 }, "w2": { "group_size": { "8": 128 }, "bits": [ 8 ], "bits_prop": [ 1 ], "scale_bits": 4 } } ], "model.norm.norm": null, "lm_head.linear": null }, "last_module_idx": 66 }