diff --git "a/measurement.json" "b/measurement.json" deleted file mode 100644--- "a/measurement.json" +++ /dev/null @@ -1,269245 +0,0 @@ -{ - "measurement": { - "model.layers.0.self_attn": [ - { - "accuracy": 0.9882204495370388, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9905345581569954, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9908609070668095, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951194582482505, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953055936769631, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953307655247811, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9957959133347398, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9958187708949768, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960847310844416, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9963650899130458, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975374040046805, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976727519748047, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976814354733122, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978530936641619, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998687037168757, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988401763073756, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988217276036985, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998995693513854, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994910780760141, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.0.mlp": [ - { - "accuracy": 0.9788573293113395, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9796058765465492, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9828396273384753, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9845958165089158, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9920969956652507, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9927615471380321, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9937919959621994, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947610453543204, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9955766327911988, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996013992092278, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965602005770626, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977517776209616, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980571145302077, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982920531826829, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983519321387192, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985558232690795, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985996442540598, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.1.self_attn": [ - { - "accuracy": 0.9675649551576689, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9719655513763428, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9735818838602618, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.985838206043761, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9862000204524711, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9862708680233673, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9894230682647934, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9894961026056033, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9901357116912933, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9907121548036996, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.992949159791399, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9933003920473551, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9933611558211085, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938339473140475, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9963313384888399, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996854762693769, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970984498136922, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977882366970574, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991455900384799, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.1.mlp": [ - { - "accuracy": 0.9648792126068944, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9655497620176328, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9755124483551634, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9787204087662854, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9846229722331229, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9856087336512772, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9897793803707158, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9918972681717653, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9927501104232904, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9923465704594395, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932018061609644, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996051975648458, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965722221685084, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978054041337026, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979722235890988, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988558395814738, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992868421311294, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.2.self_attn": [ - { - "accuracy": 0.9549655531974215, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9577184405766035, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9608470926943579, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9771939350390121, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9772216180633557, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9791582047351097, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9831632723737704, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9855896865360831, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9864548423670625, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.98695960386019, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9897885546088219, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.990389150175217, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9906038055686575, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9911570964430115, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9942948530232044, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954187083548229, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9956502422665883, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975365417960443, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987578407836784, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.2.mlp": [ - { - "accuracy": 0.9697893717767376, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9715376135059878, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9735761159344724, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9744186364604455, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9871859428668884, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9872301918650536, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9886156304151212, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9905480359509391, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9920300567081493, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930814266033274, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9927231063943749, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954763027520752, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9936762717736297, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9959649410294859, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967340963806859, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968845676416286, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969812507134568, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.3.self_attn": [ - { - "accuracy": 0.9773997809541853, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9777744115379295, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9802319235168397, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9835550261563376, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9861178097168082, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9858822050553403, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9871265041690908, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9869086538735581, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9898698298799756, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9909247991775996, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9929535798483381, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945063592473927, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932424787824091, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9949948350457769, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968262600994325, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973462605665094, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970963730317491, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987268478021418, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989209306873627, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.3.mlp": [ - { - "accuracy": 0.9748736177816203, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9754252745034663, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9813763566168123, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.983065303513094, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9883064925915709, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9891056696345147, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9915498643500829, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941495079792252, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947129293639017, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994274784406451, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9949401401743097, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970423308335931, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974361122524562, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984229620343907, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984881023900878, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989534998496407, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995677288733484, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.4.self_attn": [ - { - "accuracy": 0.9775245683757883, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9783632877821985, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9776986822681991, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9806633030407523, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9813189817053315, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9855792696195605, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9824379402653951, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9870848113082742, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9897656714739768, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.99000161354381, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9935343193846118, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.995052006555182, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951640722819751, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953808837055572, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9944690017176694, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975515543942732, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9946357574877575, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987346428495488, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986156929639707, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.4.mlp": [ - { - "accuracy": 0.9684666705955016, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9692107247384755, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9758444575985011, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9777806028233547, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9852369774476086, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9862247308912245, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.989018369196473, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9926463170957408, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9933333855587989, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9928076174232716, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9936384047308054, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9962723621130479, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967651326013239, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980077106707828, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980947113424343, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986221868985969, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994589826099783, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.5.self_attn": [ - { - "accuracy": 0.9695936593374139, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9705891503805393, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9731280522322968, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9768947550634804, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9808862505872783, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.980675170100049, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9827321639627611, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9823708875153802, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9835245750207258, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9878098866657207, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9903721468719212, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9923026418975113, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.990781932909924, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932272144911909, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.995879019720872, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996897589721668, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996241834259739, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982059006027779, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987016228199201, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.5.mlp": [ - { - "accuracy": 0.9636329662446913, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9645036773658112, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9717452027099697, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9739062288579973, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9829344999682355, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9840553895276236, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9871669598659011, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9915095163803351, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9922844378253151, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9917093891846506, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9926543886479187, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9956951675397393, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9962572247941831, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976934935701521, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978005933011637, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983838003367724, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993729266310478, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.6.self_attn": [ - { - "accuracy": 0.9612692887649724, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9628034587949514, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9667577293554419, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.974040487830184, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9782921209147102, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9816606941779977, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9816138662986065, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9856859499000405, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.987114743648195, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9883668328341293, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9907659800772212, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9922957990048943, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9926673089840302, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9931916442797765, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.995182077032759, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9964916162058025, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9958427349705935, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980065481872052, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998590661393552, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.6.mlp": [ - { - "accuracy": 0.9562961305246541, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9572838178198588, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.966001278867847, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9685873390224419, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9794017125999457, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.980761945492735, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9844822488284033, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9897524534822687, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9906880186478558, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9899864473723268, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9911289272192669, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947955968520162, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954767533651504, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972120044556888, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973420008195957, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980428308603893, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999242253434587, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.7.self_attn": [ - { - "accuracy": 0.9465726462045783, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9480165510781502, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9513992072132073, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9560651735059524, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9725402221083641, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9721119005331084, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.976052454280618, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9755358034254689, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.981606609629173, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9848976550427707, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9853805466309974, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9892821685460053, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9859692887893241, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9913836903870106, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9937486184001165, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9956841637919608, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943358843753997, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977207956835628, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981769265723415, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.7.mlp": [ - { - "accuracy": 0.9490519561069577, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9501709183187861, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9603118763158196, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9632658540810409, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9758172733219046, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9774427958519051, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9816979109554699, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9879920789971948, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9890790314315573, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9882270712650528, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9895734047566197, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938631905242801, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9946672856489098, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967163356364166, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968656286609506, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976750305106276, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991093663985865, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.8.self_attn": [ - { - "accuracy": 0.9349488094449043, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9374840585024733, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9425851940912635, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.949600921257546, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9694062721376356, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9692638941893452, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.974350526193647, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9741616337805202, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9742078338014452, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9738768945987287, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9760646881829751, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9756764405357995, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9762780994765068, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9765851407949078, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9895400945587378, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9894597004167736, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9901393675245345, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941598093749857, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966629587395704, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.8.mlp": [ - { - "accuracy": 0.9629054266567293, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9647370508234752, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9682142667864498, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9692535847425461, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9833535054747603, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9797997134982755, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9808957519401845, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9880126019155508, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9818137410145841, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9906013702091417, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9899346071148389, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951150726992637, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947159383200893, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9961527823347991, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981142452697417, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984738746363866, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990120776530699, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.9.self_attn": [ - { - "accuracy": 0.9824795060496974, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9834199278909517, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9877707456287584, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9897850279598251, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.992174714570865, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9922820045367667, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948615330197897, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9949204068949544, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9949247708151999, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954484432298494, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9958472031499505, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9962069575956679, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966186391447034, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971987023324657, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981225722493898, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984768687618749, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983740690942412, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992034709476253, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993874440107519, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.9.mlp": [ - { - "accuracy": 0.9705127511957758, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9711937813676501, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9762913742053666, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9777256831232655, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9858587208743158, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9868558413398109, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9889210957151494, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930258581022683, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9936368123897793, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9931242576996354, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939300411037708, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9964031588629281, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968731702967105, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980636101099662, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998146507696028, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985378281103054, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994622226360278, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.10.self_attn": [ - { - "accuracy": 0.9864729203430838, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9869435959936756, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9886149190317252, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9910360327676723, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9927878838988315, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9927770932523632, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9942982605486912, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9942427410033384, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947872336978387, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953893248457462, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9962648889233702, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965980228857676, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965978854666709, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969727539114261, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978305928433608, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982330091634618, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980437424413762, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992586171674224, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993688381724305, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.10.mlp": [ - { - "accuracy": 0.9670894271449039, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9678923083763373, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9733611385485059, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9748675119327871, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9842287694643203, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9853361084272987, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9875040943734348, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9922437091407023, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9929047483142073, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9923407342530003, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993218053401889, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9959851361284229, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965114832463625, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978183274414685, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979208813563577, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983299852771635, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993613586503718, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.11.self_attn": [ - { - "accuracy": 0.9841853496373484, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9848977069762584, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9866442458802148, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.989957207862876, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9915584319663283, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9916609183542038, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9936562494105218, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9937502216841829, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9944848104159495, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945946315327954, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9957828272477185, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960234793344218, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960864995557227, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9964123678050543, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976507452696464, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998073629857237, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979943736995521, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991404783259464, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994402442764687, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.11.mlp": [ - { - "accuracy": 0.9653792869495719, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9662312446652275, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9718316820401111, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9734083249380714, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9833819396107605, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9845326148267639, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9867946740337893, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9918407498809853, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9925300216449326, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9919674430336607, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9928734018957537, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9957822327820682, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9963289968988025, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977335362131462, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978327470292386, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982618158530002, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993973692701394, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.12.self_attn": [ - { - "accuracy": 0.9809227428938213, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9818319316071115, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9839795706548581, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.988320359837656, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.990731971975612, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9908016119152308, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9936052376012269, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9936856016957838, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9944361469855434, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994811580920788, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954498842799742, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9957158732791677, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9959101326224443, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996257789212426, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975262938532978, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980205749406627, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979699839911375, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990090921775462, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994567477260716, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.12.mlp": [ - { - "accuracy": 0.9608404901074736, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9619475241358343, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.968720872592377, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9705056721638692, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9817198754631375, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9830821059938324, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9856341550077655, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9909440865495095, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9917380861251762, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9911152094504551, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9921450301220542, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9952760638767167, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9958571750352061, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973209198613308, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974456275139298, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979140613669235, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988501633817674, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.13.self_attn": [ - { - "accuracy": 0.9736051584937071, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9748023099903214, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9773332505535922, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9838855053837362, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9871030139344695, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9872726398569188, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9915206839918698, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9917275623516425, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9922264227398524, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9917356792935416, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9936152534853471, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939479430353171, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994256846790545, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9946505308249279, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967165808254657, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972693689816975, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975173671938184, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985453651262153, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993327038169618, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.13.mlp": [ - { - "accuracy": 0.9604540489810078, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9614184061555486, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9682551911590915, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9701809340009564, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9810908896554458, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9823323927427593, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9851020740982341, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9906899045760694, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9914534421833722, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9908338301864109, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9918441949657312, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951915733981878, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9958007758066646, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974171509476084, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975250780668208, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998033794598948, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992826005970863, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.14.self_attn": [ - { - "accuracy": 0.9723096826652947, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9731413587241581, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9760942838007682, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9820764129491228, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9866166731685793, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9866270714586503, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9910210760212258, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9910132310255185, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9920738826530349, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9929192795810339, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932417026108229, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9937086181813165, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9937793118634114, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9946692780174903, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996453154041726, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971555996330848, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.99722475114303, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985364865095011, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992379903820907, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.14.mlp": [ - { - "accuracy": 0.9570539200580434, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.958134865878444, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9651964487213838, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9671983848276892, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9794472279027104, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9807890283532048, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9836489116086772, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9898943576814705, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9907129688041383, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9900589258606104, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9911515758323827, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947862577207974, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954436612928188, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997199477505331, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973237090682807, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978497518888233, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992544794344882, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.15.self_attn": [ - { - "accuracy": 0.9733219906001499, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9742616338067149, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9769677392844307, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9828677778867515, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9866369075239881, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9866732406478963, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9912626813901099, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9912183459505046, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9923342516106602, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930039875601467, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9935099222623792, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939333491860644, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994236258189439, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994803033180927, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965457576817196, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971805509841559, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973381760554683, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985424883014179, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992463467152495, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.15.mlp": [ - { - "accuracy": 0.9532555930904651, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.954468931708681, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9620204120874405, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9641407286062053, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9776476283410662, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9791234724811817, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9821462660145602, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9890403767165384, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9899273838749841, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9892069691988198, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9904082612575669, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943444041878378, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9950679055243534, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996968070523029, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997102826784708, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976432219907445, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991955549661438, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.16.self_attn": [ - { - "accuracy": 0.9722356125712395, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9732577826239561, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9769843129049006, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9828056990119972, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9864845129808313, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9866309622302651, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9909224152368935, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9911058515702423, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.991850024334302, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9923147417506889, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930639942911895, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934817019751981, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9936424959328418, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943977453084173, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9964481470248613, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997087573687742, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971999532273529, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998487363796142, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991494452375551, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.16.mlp": [ - { - "accuracy": 0.9517271829474914, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9529834822016328, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9606666192412376, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9628361834115103, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9768954031169415, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9784106156347614, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9814808571613148, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9886280656547138, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9895338837155386, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9888238735055845, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9900480544586715, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994126516087961, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948691413615292, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968367236127195, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969830299654093, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975521419025761, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991583220360457, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.17.self_attn": [ - { - "accuracy": 0.9861707418764892, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9865728014540908, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9883402958022136, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9908840801615856, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9927610737443167, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.992779151573287, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9952388153730058, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951764270908346, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954970708901161, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9964758312317396, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996486721560359, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967496096475148, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971519922960157, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974272454610879, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983299265180616, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985810908225463, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986913191846964, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993282435860141, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995894279402963, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.17.mlp": [ - { - "accuracy": 0.9690574249742847, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9698944905478704, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9748335511080528, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9761615807288572, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9851480897908148, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.986175183525407, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9880843365466908, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9926866869393148, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932775972778664, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.992778980393747, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9935962456940232, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9962128377151921, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967012514703368, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997957951334109, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980406912815708, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984048975564196, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993978408548221, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.18.self_attn": [ - { - "accuracy": 0.9852060933450335, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9856969456090346, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9877626834633318, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9908795001867571, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.992112539390004, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9920635373380623, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9942708956445322, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941813811341202, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994875914552004, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953988226081588, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9958912366020837, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9962406441642854, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9962281815854734, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967142972960382, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978800447118518, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982138517079875, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982289484070958, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991631350590309, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994978272711466, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.18.mlp": [ - { - "accuracy": 0.9679507509266075, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9687712898379878, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9737539008553875, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9751357977327547, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9846126060246637, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9856350398377368, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9876307456901199, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9924119787143642, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930193218843717, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.992516254295448, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9933554033234128, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960730306093434, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965748960323828, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978759559177744, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.99798579873076, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983620531274937, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994270656607114, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.19.self_attn": [ - { - "accuracy": 0.9843009946573722, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9849340982588106, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9876898622973577, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9909960308455323, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9921372072820208, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9922685598660457, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943951468081459, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945444293614281, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9952548786041964, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9956908837132352, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9962514023615145, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9964220559310266, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966573578207508, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969622126510856, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980643098732751, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983549344929328, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984088557999042, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991987058004413, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995065833255036, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.19.mlp": [ - { - "accuracy": 0.9659446463184922, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9668195911340023, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9724387565725728, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.974006346671989, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9835378354121196, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9847497135204705, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9869470548521924, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9917744567470723, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9925101894493166, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9919215284631049, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9928558143591019, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9957564929132595, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9963176649701047, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976834372715339, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977852654165441, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982278401791853, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992624424523552, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.20.self_attn": [ - { - "accuracy": 0.9789336985467296, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9796173548031795, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.982154910305613, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9862954458466878, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9897002969427329, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9898976220406199, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9927986803293032, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930779312761795, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934774209058991, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934667927139488, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948133505684765, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951649437854556, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.995391488810511, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9957995398680827, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972931251824392, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978313502782074, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978120149186763, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989231554117348, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999413372912003, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.20.mlp": [ - { - "accuracy": 0.9653460975540311, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9662008136510849, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.971804317184969, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.973366830743065, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9833473358793479, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9844538707188085, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9867199826005258, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9917470402280358, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.992426351006878, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9918677285558691, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.992772254676215, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.995743345702067, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9962875263789963, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977166845193622, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978096779913789, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982307505070869, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993589225219024, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.21.self_attn": [ - { - "accuracy": 0.9782385981494659, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.978893671576914, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9818941646402604, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9859361477980488, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9894642201252282, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9894993338782928, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.992651914834584, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9926543875695452, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932784819607868, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9935200471112406, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9942725697581313, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947749357057833, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947491379846868, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9955873196135814, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970668423063073, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976991496637071, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975780639123466, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988485679189724, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999302346165917, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.21.mlp": [ - { - "accuracy": 0.962106925876517, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9630539775090783, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9690157783854949, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9707043213083556, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9817663001288709, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9829783799911016, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9854112260631824, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9909534323225287, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9917008573385445, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9910960737860909, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9920880394056439, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953424882290787, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9959387247401633, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997500507498013, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976122382979252, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980684780013306, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993210921912625, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.22.self_attn": [ - { - "accuracy": 0.9780922332582506, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.978869912753764, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9814116187197598, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9858588437108617, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9888246001332606, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9888638533455761, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9922706500234965, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9923215211908284, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9935549961795148, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939102595304384, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945722589342806, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947101152869627, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9952703933966788, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953047211870159, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972201890593982, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976306705564064, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978561351553684, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987492840645198, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992992321112022, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.22.mlp": [ - { - "accuracy": 0.9588064415086257, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9598433502802723, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9662450301019769, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9680547101521179, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9801951310058173, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9815084705815503, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9841221991534296, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9902003852797574, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9909982504813295, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9903476410545409, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9914218722352464, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9949536604414645, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9955990351614004, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972932328972476, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974132889734679, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997899418886702, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992698042203157, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.23.self_attn": [ - { - "accuracy": 0.9774867482973557, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9782635502909359, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9814763525010723, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9858601245244867, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9889494628939581, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.989007454120407, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9924209862947464, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9924669015515399, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993051266493766, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934780092199186, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941525627785411, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9946874537946362, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947554781101644, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953885440543098, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969951946067771, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975553913176746, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975571473508975, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987948906920409, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993094773319746, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.23.mlp": [ - { - "accuracy": 0.9571248330175877, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9582005188261208, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9648972476195348, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9667915358747307, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9793798040206495, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9807598646240014, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9834720791856709, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9897522473658779, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9905935901609298, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9899336614793068, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9910487455168837, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947275200393051, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954021459837493, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997161835952858, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972966469971365, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978136691497639, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992292637532381, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.24.self_attn": [ - { - "accuracy": 0.9738485396496559, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9747441276221683, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9777574044113096, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9829785705691105, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9870791036733672, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9873112645980558, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.991570640526908, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9918808929965293, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9922428871504962, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9927824085676357, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.99344186350017, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939368302002549, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994233038680824, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948514455463737, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965893712482954, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972931650148606, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973361667944118, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985934550733011, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993033244115251, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.24.mlp": [ - { - "accuracy": 0.9558304476698762, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9569742314909634, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9640807240809265, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9659834963906753, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.978873141943232, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.980291172518934, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9830257989172089, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9895723066047618, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9904065348796154, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9897049019407285, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.990845029658981, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9946065263111928, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9952965796018314, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971059564268216, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972300085018536, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977443898831936, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999220208462487, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.25.self_attn": [ - { - "accuracy": 0.9729595246087563, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9738398251172743, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9768751105783802, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9817434998817349, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9872498605155239, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9873047516632237, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9920892463751921, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9920956094161069, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9927954156637976, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9936205648547528, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934925677892017, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939842807790754, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943467988387534, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9950215382820093, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967681560236471, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973476176566788, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975718791829422, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998532954698404, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992651618958917, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.25.mlp": [ - { - "accuracy": 0.9545503731228804, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.955748432462937, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9628542570495292, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9647542857810071, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9782595691320143, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9797235937593015, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.982443387816219, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9892884576693177, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.990132335132282, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9894171573564803, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9905862109490523, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9944502127381336, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951623721219796, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970057796760413, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971433587703168, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976485737685212, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991608786359919, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.26.self_attn": [ - { - "accuracy": 0.9733909914564145, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9745068139347591, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9769678403084215, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9821878784875336, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9860363867329923, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.986547663611801, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9902881278019202, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9909210366343981, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9909463781667384, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9918404215652692, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930184101930967, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932786182157303, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9936587534673316, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939400048338269, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9962816307639801, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969254212551996, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969868225070011, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984332764809216, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991316572597594, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.26.mlp": [ - { - "accuracy": 0.9536722964166027, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9548553596005628, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9619902782142162, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9639054275068798, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9777411777330073, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9792778193950653, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9820300241148001, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9890402292979783, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9899328148580695, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9891402346180066, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9903789758192081, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943115390763667, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9950560285636273, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969522146887979, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970772827387249, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975837871897966, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991704691569075, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.27.self_attn": [ - { - "accuracy": 0.9756316791631674, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9764137372962738, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9795862760786948, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9836311326117108, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9876224265776967, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.987787910124385, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9910188992507756, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9912735915399695, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9917128527487972, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9919372349487323, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930442216021842, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994131910749466, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993419416984053, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9946564663520181, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9964691252496681, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972425610145652, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969960753193223, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987231166451238, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990994735455484, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.27.mlp": [ - { - "accuracy": 0.9532267248356029, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9543935216375088, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9614411525820431, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9633558608199421, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9774605466641093, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9789900237106179, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9817423981271292, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9889018747367357, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9897859309015697, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9890153630097446, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9902401904967663, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9942469612303141, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9949826362763384, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969187534879893, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970466135755965, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975550657407822, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991729445939296, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.28.self_attn": [ - { - "accuracy": 0.9736109329877716, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9744112203387838, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.977189416408931, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9824266975353423, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9864402529048292, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9866243562612095, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9905266289746291, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9907310144289544, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9924879975028729, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934782465723785, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932076331277034, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941828977806788, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939652102705288, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9950893834527386, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967897168826312, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973221275109896, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975812238408253, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985351975222951, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990018626252238, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.28.mlp": [ - { - "accuracy": 0.9525744284650213, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9537497147133476, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9608402443363478, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9627943174227288, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9771050146143687, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9786570815761623, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.981469673545737, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9887553708578803, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9896659317524418, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9888765994018238, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9901271421756399, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941768119185183, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9949266155568981, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996881374452067, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970128862003452, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975224598310888, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991645446034932, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.29.self_attn": [ - { - "accuracy": 0.9746558973565698, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9781670631644758, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9812162214105851, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.985899878371703, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9890875123245152, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9891290752658326, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.992920291953181, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9929612820085726, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934377180853564, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9937369674321657, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9942727519664913, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945899847600805, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948429521785951, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9952460868823293, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970607041491588, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974361287956861, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976401122740323, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987675788702051, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993612719531244, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.29.mlp": [ - { - "accuracy": 0.9517628647583096, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.952947413940963, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9601163726888204, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9621070281259323, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9766926598294001, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9782585986938915, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9811352262959668, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9885598018667415, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9894622691269768, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9886680656651917, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9899276561397863, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9940605976765877, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994818032976534, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968197857225803, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969509066278605, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974699975910449, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991421802765935, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.30.self_attn": [ - { - "accuracy": 0.9754208168014884, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9763047770646057, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.979358375562649, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9843067432704725, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9880074547652743, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9882528666946057, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9921787435581025, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9924407610424647, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9928621048303811, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993227951351161, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939536288331606, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994370491954645, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945889245106005, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9950688837316671, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969237022297947, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974843872662046, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976167868896338, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987252978577377, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999351345004183, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.30.mlp": [ - { - "accuracy": 0.9513624643808917, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9525544589483425, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9597670226112792, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9617861979886105, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9765405196108317, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9780874011645976, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9809873012433711, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9884582313856012, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9893714052281881, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9886090847614565, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9898572772937385, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9940361941331312, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994783941448029, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968061691248103, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969434247254149, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974753149098864, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991459085079106, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.31.self_attn": [ - { - "accuracy": 0.9822352961882165, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9828565121677361, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9852549997893604, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9882352127399492, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9911184155284182, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9913561799759535, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9937328854453211, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9940438085774842, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943047504455439, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9946461524079112, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954218455884409, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9957255396599832, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9959568324327273, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996349972501201, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975745828920289, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981044095965397, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979645179486588, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990538424922546, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994339877999951, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.31.mlp": [ - { - "accuracy": 0.9630144494340608, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9639698155224323, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9697613037730518, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9713336770471773, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9823074184456154, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9834966490927496, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9857394332556348, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.991285512689501, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9919903437656007, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9914061699895874, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9923710359113389, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954828007734919, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960643251328484, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975648572741959, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976709926268086, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980934187647348, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993280945789976, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.32.self_attn": [ - { - "accuracy": 0.9790510156829106, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9796307099688994, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9821652513683626, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.985494228030898, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9901167783445042, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9901706227101386, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934925372516247, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9935464574587777, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945658631745333, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.995023774387511, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951055851201281, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.995409948628788, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9956343310860623, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9961852341106063, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974272163633845, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979646277221802, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979559349227011, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998911897792775, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993998085311001, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.32.mlp": [ - { - "accuracy": 0.9610166361457423, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9620409255945369, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9680364842673665, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9696653069634187, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9813824990568193, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9826489092880174, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9849216505689057, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9908184035877255, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.991577413374264, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9909811651971387, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.991996785307205, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9952624373652629, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9958734596638303, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974344075338817, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975592653316102, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979798469300333, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992654662407738, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.33.self_attn": [ - { - "accuracy": 0.9787205798845542, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9794902403985983, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9814950175289261, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9852516830065533, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9884888207981068, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9885762788747486, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9915552357897947, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.991734795002757, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.992807085209183, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9931715404810874, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939397842696819, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9946641120692029, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943555033594174, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953113669210947, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969192963632706, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974449117618956, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974236938256869, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988004862128659, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992272294276565, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.33.mlp": [ - { - "accuracy": 0.9600882740005067, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9611030882714611, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.967115401633476, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9687597652603137, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9808443939607394, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.982148331099827, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9845027742221167, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.990594307867516, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9913519691970003, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9907009757712091, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9917501238615889, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951172334043995, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9957452439825589, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973724808874785, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974840161402857, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979204379732868, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992727963621483, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.34.self_attn": [ - { - "accuracy": 0.9791891932683555, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9797003533397066, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9827055727670851, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9859564048483184, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9899890494385832, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9901864423759674, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9929981044444599, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932310404501071, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993493477985459, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939784984668031, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.99374700919725, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951219054453663, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9940390423381407, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9956641240094445, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969575066168449, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977141148594552, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973687183994212, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989171978331318, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999136444738772, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.34.mlp": [ - { - "accuracy": 0.9592460586052192, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9602555288492065, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9663621407786482, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.968032831424161, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9803721976809596, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9817170475756651, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9841167008700339, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9903442096700402, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9911300276994313, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9904589289309162, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9915394799440707, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9949890375137329, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9956366072629431, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997303224622125, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974217459032508, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978679743240049, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992629450402761, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.35.self_attn": [ - { - "accuracy": 0.9777314161979839, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9784104121162703, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9808523450046778, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9851963832778367, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9879348366276214, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.987903659648605, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.991085924649317, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9910938463811028, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9935713139313617, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941650984539209, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9940094218442315, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947892531482992, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9946930172215951, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9956315364811177, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968497433368803, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974873653171878, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974110632595655, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986898895003833, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991949468538607, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.35.mlp": [ - { - "accuracy": 0.9586515150179988, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9596694860803454, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9658243458129858, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9675320449628328, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9800485998490139, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9814022296647492, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.983861952529926, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9902109386840541, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9910107216503667, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9903333244453135, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9914264683611691, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9949299194359857, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9955860507846075, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997279235964494, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973955037268368, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978376264304021, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992571023329603, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.36.self_attn": [ - { - "accuracy": 0.9776381832106333, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9813781647305739, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9839319633810144, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9879774397827292, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9905696530140152, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9906034163983637, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9940683696615068, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941243857120801, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9946100601166683, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948081706380961, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951146310440412, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953168563936886, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.995697930161106, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9959064980503172, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974819037842712, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978454903834254, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997987266513519, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989851911515487, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999434256757683, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.36.mlp": [ - { - "accuracy": 0.9576055638884243, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9586373787177236, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9649457797212035, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9667014503165295, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9795368189776414, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9809063085305848, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9834468364715576, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9899619626077382, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9907688921268442, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9900783855517051, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9911839658345439, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994787181421232, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954515421444452, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971975190013549, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973125433193912, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977754347185653, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992200363715376, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.37.self_attn": [ - { - "accuracy": 0.979711619058722, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9804393435877404, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.983189134123294, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9871259311606225, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9901654269350203, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9900285656760005, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993561055756321, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934362859563216, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994072927051763, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943498634292107, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947865838920208, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9952602389170543, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953610690212563, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.995939648097479, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975008604999044, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979187919709244, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980562140944561, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989631389770167, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994263244213806, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.37.mlp": [ - { - "accuracy": 0.9570620137413866, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9581024539902022, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9644519565136809, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9662388289641393, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9792645409409153, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9806550090740386, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9832292007478444, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.989813087336523, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9906408477476553, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.989950792961999, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9910747800757619, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947239581961185, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953973116965866, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971625119459963, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972861611443621, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977593134451461, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992260467414255, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.38.self_attn": [ - { - "accuracy": 0.9775873071661121, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9783417663880085, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9809408395698196, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9856620742204157, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.988971699421343, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9892664281896463, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930035092524792, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934017446725384, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9940062987275029, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947131394874305, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945065072541567, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9950605529041863, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953408380480189, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9958560609405762, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973401425982287, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978142718135036, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980555273911083, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988170645389657, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993816453191501, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.38.mlp": [ - { - "accuracy": 0.9574652098511395, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9584942922780388, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9649066485856709, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.966704194777106, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9794606829160138, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9808213225516834, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9834302286862543, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.98990653705244, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9907229113343515, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9900666490491283, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9911576529082499, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947858013849902, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954437140350867, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971948416205123, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973168824738088, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977901357495668, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992123161914366, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.39.self_attn": [ - { - "accuracy": 0.9710564414333356, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9720825283346992, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9758245938114429, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9819479747430274, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.986015264631102, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9860168086658967, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9913672744786661, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9912761617206821, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9922719760797918, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9929132712024608, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930191549080375, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993504674394468, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939420354846669, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945228280695645, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965853629094598, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971766297301081, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975068512691283, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985049929175722, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993195046875993, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.39.mlp": [ - { - "accuracy": 0.9578263984110794, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9588534493783587, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9655756333744839, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.967396610465489, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9797582621931246, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.981072667054832, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9837304817414597, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9900553604391845, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9908342221506724, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9901815008028949, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.991248109647514, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948440280889994, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954887098804313, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972328614186201, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973475150396361, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978387096994802, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992457044825546, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.40.self_attn": [ - { - "accuracy": 0.9717569399232927, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9726884579776149, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9754746611181059, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.980784489528129, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.985973898615492, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9860863864519879, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9909107467698816, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9909848171335301, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9908135529411467, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.991887724296631, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.992472322290077, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930481878599446, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932687396281644, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994045512959067, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960841889076523, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969169315146772, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968705216765796, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984750681691558, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992199691508798, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.40.mlp": [ - { - "accuracy": 0.9564970894471595, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9575570967832678, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9641731716692448, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9660084270136922, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.979034281482822, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9804015604190921, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9830492590308973, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.989720180336582, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9905194909017729, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9898408374406005, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9909420238415662, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9946487781861307, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953114996958328, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970986786004352, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972137183649465, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976848900550976, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990766172168629, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.41.self_attn": [ - { - "accuracy": 0.9770101121577778, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9776488449424505, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9820634948187753, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9863719968125224, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9892047781154121, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9890495231375098, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9928938490093538, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9926164571293875, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938887580984125, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9942718080097908, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938991811665657, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945403891002857, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994544442743063, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953633773532745, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971146524614213, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977299039141814, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976448478900191, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988813536811473, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992916672168863, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.41.mlp": [ - { - "accuracy": 0.9563150062764946, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9573670481576732, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9639402643630379, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9657975951895902, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9788860730630787, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9802855574770978, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9829560828052069, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9896457331794265, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.990479273280423, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9897895093860203, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9909194150302363, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9946508416415829, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953261564828848, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971316786188829, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972528553457538, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997728544661138, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992144843502166, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.42.self_attn": [ - { - "accuracy": 0.9712654762950382, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9722954513211, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9743992296586695, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9790059728547931, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9868723142303919, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9870088369536557, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9913842814337266, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9915070816335317, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9917566013816548, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9923080632365063, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9931133666418885, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932062293794987, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9936682838260343, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941253336881729, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996488824964648, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970280338250297, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972243260371646, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998549039781976, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992064052555514, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.42.mlp": [ - { - "accuracy": 0.9558099746508033, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9568892305618838, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9635181530917946, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9653836441667456, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9786506432941869, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9800473455045569, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9827304568729902, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9894920279467968, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9903151583367664, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9896353076849329, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9907559361553898, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945327832829207, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9952000582370123, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970109531974518, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971551778358653, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976285782238272, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.99903243092673, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.43.self_attn": [ - { - "accuracy": 0.9715870522746914, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9726384684051338, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9760120682614414, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9814316347045334, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9866372440010309, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9866012104443813, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.991490655382605, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9913822829380239, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9923167168898018, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9923488248727823, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932924356115492, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9935645317462715, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9940087567818793, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9944099249367259, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965813810030293, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971256508786035, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974009275534436, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998521328294086, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992440855161189, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.43.mlp": [ - { - "accuracy": 0.9549485961661527, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9560327927925085, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9627555982258759, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.964662976072807, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9782137350228272, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9796446955419684, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9823896207503582, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9892954623424693, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9901412461982354, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9894461202141094, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9905970831107545, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994464055684052, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.995153624454121, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970301030767396, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971556737590092, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976553477922847, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991923025135245, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.44.self_attn": [ - { - "accuracy": 0.9779063557323656, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9791595430456494, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9817942768139275, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9854031693876574, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9900620644667039, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9902243283577263, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993903689772675, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941680964915768, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9944853891226414, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948279957385048, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951596260315886, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954364137405431, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9958761049875695, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9961881353029687, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976057186355128, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979170642204975, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982206250087505, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988328405459853, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993145932523092, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.44.mlp": [ - { - "accuracy": 0.9542109068287046, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.955330751738266, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9620844890412531, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.964012287851227, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9778214636209764, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9792865773074722, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9820560082106998, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9891122145284164, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9899815005054208, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9892683826190861, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9904499136303601, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943736235513106, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9950848510979038, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969849285645116, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971120251085315, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976013712830057, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991814657451438, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.45.self_attn": [ - { - "accuracy": 0.9732251690798684, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9741913978206483, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9766120217054298, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9819748395759809, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9870382192985792, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9871348942580976, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9919766536668727, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9920573471065023, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9918456870915466, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9928636885199108, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9931106808791427, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938143129019361, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939780135786063, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9944998123560493, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9964960827737263, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971380406059325, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973056524759158, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985450457162713, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992604759264443, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.45.mlp": [ - { - "accuracy": 0.9534773030563405, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9546125284151027, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.961370690284591, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9633095630101467, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.977477968376326, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9789571097904914, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9817204875871539, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9889247393431632, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9897957758144721, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9890964144693786, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.990276296868136, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9942748635321071, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9949840438601217, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969147092442175, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970535553967286, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997549182661858, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991326017096933, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.46.self_attn": [ - { - "accuracy": 0.9809021512046456, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9814630426760567, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9840353894978762, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9880042285903504, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9906709466227576, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.990859099614777, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994219038770289, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9944620200212261, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9946980372585944, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953874928484622, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9952258251999554, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9957216745242476, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960560692033094, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9964413682341968, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976627391910082, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981281452407864, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982403894261408, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990206645820044, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994563161545604, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.46.mlp": [ - { - "accuracy": 0.9606953307398056, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9616757409745141, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9674786180257797, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9691364939667677, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9809969547939928, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9822853370324561, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9846590607868213, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9905906744595421, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.991368295061157, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9907558671394853, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9917863266248452, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951479880776453, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9957698101888558, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997378358666442, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974955666663223, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979342561533773, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992370285847421, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.47.self_attn": [ - { - "accuracy": 0.9752850036479925, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9761783287517334, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.979410578937907, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9844614517335829, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.987965398762179, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9880824035621787, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9924183067571568, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9925409366719817, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9933876615556839, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938117941155246, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9940182702174705, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943789015279004, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947094416098767, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953048815705666, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970441615258, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975516698935902, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978255694119358, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987108412529587, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993998407230001, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.47.mlp": [ - { - "accuracy": 0.9609388120864567, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9619093040298474, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9680912527990968, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9697874672710896, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9812391342007016, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9824671829609495, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9849279516033436, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9907252042995471, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9914622985463786, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9908595959361839, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9918576889475318, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9952045411214625, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9958076977269038, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974222173117787, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975336645300942, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979948004903761, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992872270920902, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.48.self_attn": [ - { - "accuracy": 0.9754408506774589, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9763146054587866, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9786890541742507, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9832116161522112, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9877053778618574, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9878120344426287, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9919684451122425, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.992075766951434, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9923531104271349, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9929600983956143, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9936710186057577, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938637112561417, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941407775408343, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947467887847635, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966768880227679, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972393820001009, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973957525063796, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986075421758486, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992801064662146, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.48.mlp": [ - { - "accuracy": 0.9597660126654726, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9607615259132887, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9668447317457513, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9685493671384297, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9805870386036603, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9818632886990121, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9843141038068816, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9904128491731459, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9911700818981779, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9905369169274835, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9915721217464459, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9950187461646763, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9956396768408778, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972861164715141, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973658834044871, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977976122356363, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990114149741681, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.49.self_attn": [ - { - "accuracy": 0.9792942539053527, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9799596787381329, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9839871171861887, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9873687844527396, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9901040313814423, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9898505879222954, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930741806808663, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9926162257692531, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945450129330551, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9949456634563639, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941343557775805, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9952966939584401, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945612339498965, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9961538017408824, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973388526797, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978085001501696, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977702011452302, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989160866825841, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992990036798323, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.49.mlp": [ - { - "accuracy": 0.9596173349571855, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9606053360590809, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9666440549649691, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9683674078826842, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9804678935263502, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.981778632751421, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9842474710098222, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9903742080358299, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9911599093803057, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9905142339044496, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.991576397252318, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9950252489284858, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9956623409923754, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973262686123091, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974394364674625, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978906543815116, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992545002452587, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.50.self_attn": [ - { - "accuracy": 0.9749343694236717, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9758063415065408, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9776645799804675, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9815044089367515, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9887560218278515, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9887683535926044, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.992852785894157, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9929406013535825, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9928209282898981, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993746012473773, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993889941515303, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938867668572225, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943207838621578, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948586566545266, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968457060106295, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973492849691722, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974677664791479, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986919519601782, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992794936835324, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.50.mlp": [ - { - "accuracy": 0.9593864834230197, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9603918627100556, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9664256296267635, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9681369770122201, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9803597823667684, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9816469862861069, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9841001931283819, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.99027987772991, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9910534485978516, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9904328763337904, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9914702893921027, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9949529880020571, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9955777052520333, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972384536920703, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997331456342516, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977588379559549, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989858896802425, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.51.self_attn": [ - { - "accuracy": 0.9767883061676433, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9776730609959677, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9800923878425046, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9845930247224475, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9883853704423496, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9885073582837848, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9925742856177845, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9926950598233625, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9931846569831434, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9931014473795107, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941967114757159, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9944727178663015, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948360725967703, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951578382231099, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970273599886385, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974859247082158, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977314295063383, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987231146568727, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993300205203827, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.51.mlp": [ - { - "accuracy": 0.9586140152655149, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9596309551086865, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9657083440964159, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9674461057507678, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9799463917550287, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9812798321149067, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9837762296297833, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9900963203246264, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9908918369267332, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9902401071679043, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9913199893934163, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948815995043045, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9955299914397887, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997247823348612, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973695350038868, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978274586487954, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992411635987657, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.52.self_attn": [ - { - "accuracy": 0.981209423491045, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9823150373014965, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9844379955412526, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9874632562706737, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9913198578563568, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9915326666822167, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994615727275806, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9949746528076694, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951414435636252, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953097939295205, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9955767613320955, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960371207960538, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960770480040657, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966595794172272, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978970426355341, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981618699139768, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984218125893293, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989452982676754, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993941331064745, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.52.mlp": [ - { - "accuracy": 0.9579851408734134, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9590255693581543, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9651084846179736, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9668606480485514, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9796070047213059, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9809790204622244, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9834844657268963, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9899307725166804, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9907559811039582, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.990090112177361, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9911971110605488, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947988523145843, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954641882918382, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972010156825969, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973239656108873, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977774500761083, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992297820556958, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.53.self_attn": [ - { - "accuracy": 0.977413419192951, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9782360120627441, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9805683773991308, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9847101009892005, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9888113891509803, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9888311599528319, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9929440567564023, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930253172979543, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934868722614881, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939290937281361, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941466163451734, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9946702127344906, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948784552980214, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9952619192067926, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970928397960961, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975484455384216, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977977727784922, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987509739156323, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992999458275, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.53.mlp": [ - { - "accuracy": 0.9573942191506687, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9584578840356124, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9645117231105503, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9662671783252766, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9793135299788494, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9806920514491043, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9831841678701734, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9897701003049549, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9905951182406983, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.989946434985062, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9910489760682379, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947154190832455, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953821359055215, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971417677221134, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972798773271375, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977306639040379, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991840054985675, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.54.self_attn": [ - { - "accuracy": 0.9767079025899109, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.977352696912069, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9803887400776148, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9844880158473787, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9887046492903641, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.988605701119492, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9925596359136858, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9924324596496789, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9931573989359956, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9936052711533481, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941660206470835, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943045353958089, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945753059153887, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9952540259416166, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970159682224652, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975190142745545, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976459243641186, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987530070571474, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993574281351788, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.54.mlp": [ - { - "accuracy": 0.9570997731858178, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.958189643821434, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9643404633787117, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.966088586535893, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9792251544269291, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9805953376191227, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9830968254000733, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9897433068162125, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9905577291930584, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9899091602205053, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9910092807718014, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994701839517802, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953642795475102, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971471310372612, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972773203258648, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977361347991973, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.99921287859692, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.55.self_attn": [ - { - "accuracy": 0.9695573284437782, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9706059746248158, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9745910008879084, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9787789012648558, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9863057629646439, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9862325709607256, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9910277604664627, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9908925748283142, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9913873585480216, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.992780896026249, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9929428020198094, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9937195450704741, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9940287008832552, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945443015251505, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9964534278934527, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970552837721219, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997298239950875, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984264786257163, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991799524838203, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.55.mlp": [ - { - "accuracy": 0.9564215263449832, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9575268669348014, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9637949808843826, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9655832953162884, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9788988244376684, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9802926490573507, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9828532760668742, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9895977142855132, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9904243124305809, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.989760276371319, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.99088428184194, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9946291431245443, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953035508576584, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997110079569546, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972396320908478, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977000763419231, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992050519519436, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.56.self_attn": [ - { - "accuracy": 0.9767964243104583, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9780962638753025, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9818783348524257, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9864068819994205, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.98884237567453, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9889860269858649, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9928799899736125, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930204350844418, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934806713687354, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993710294267849, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994419574198362, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947371815796942, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.995284918131993, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9956321344759903, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972966810823173, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976747798132956, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978316927291943, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987880232419182, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993698959771887, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.56.mlp": [ - { - "accuracy": 0.9558481726991503, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9569580902002359, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9633891036439883, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.965219681690398, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9786280024012453, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9800240504800489, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9826414382673407, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9894521661641958, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9902796743584699, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9896197290845999, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9907411572180296, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994550685716891, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9952269034876832, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970703800086325, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972015581289796, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976796817071246, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999199894995208, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.57.self_attn": [ - { - "accuracy": 0.9743773425488096, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9754740736006122, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9786829514625041, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9835504031691112, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9868463312128657, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9869135971132078, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9909548793282164, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9910295588503543, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9923328108546373, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930325874470567, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934651316633742, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9940287025008154, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994318326224426, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948375212322724, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966046582221201, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972466423764432, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972764880543477, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985492323512996, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991909698981448, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.57.mlp": [ - { - "accuracy": 0.9547058806607598, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9558582782353225, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9623701368507586, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9642343854433612, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9780857327737307, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9795207473400392, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9821747248796256, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.989193785420962, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9900363060694776, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9893647772505095, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9905154847441927, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9944241215033751, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951155016345805, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970032537454053, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971379352194306, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976202566115382, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991821122178073, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.58.self_attn": [ - { - "accuracy": 0.9737724869659072, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9747093338519335, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9788968468477067, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9836884381641683, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9878376387549859, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9879029724550875, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9924093013335216, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9924235558137298, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9931599753173558, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9935815009582591, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939732816021302, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994340229583414, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947158720368814, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9952673047388855, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970020160912291, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973968264928675, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977400327701808, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986150747189593, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993064987479316, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.58.mlp": [ - { - "accuracy": 0.9542580380251533, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9554184122305167, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9620072391668433, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9639005426709589, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9778304962735427, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9793003864684388, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9819949630059694, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9890384519668786, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9899067947837082, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9892315054312348, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9904005098842869, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943304753764287, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9950274285909376, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969021385952243, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970676992823811, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997551432822978, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999045616447754, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.59.self_attn": [ - { - "accuracy": 0.9712702203541994, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9721021765941068, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9754365418890589, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9804669231862614, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9852343858464768, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9851466194285374, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9897960061452499, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9896332849993518, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9908737215262494, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9918640852954826, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9922076034800786, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930729607463276, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9927911491770494, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9940699757517952, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9958033907276235, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969043544199514, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9964993745333662, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983849236251492, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991502591996992, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.59.mlp": [ - { - "accuracy": 0.9538481580583673, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9550095816191874, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.961678990211926, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9635978259734417, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9776741023126402, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9791287534723156, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9818718706404692, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.988947513562284, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9898140834350335, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9891379093261141, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9903134481344176, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943047307774817, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9950090328991217, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969281288672631, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970736266358903, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975771633663068, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991450566908737, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.60.self_attn": [ - { - "accuracy": 0.9632178096982994, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9642687826172301, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9687497295243175, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9750961993673914, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9825188765106233, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9825517327377671, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9895424155126277, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9894959923419121, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9903817095695749, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9910479694310772, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9914316513183478, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.991957394185623, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9927268028259277, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993434868630414, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960005765315145, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996605489035382, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972485684740701, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981698036487949, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991923777637758, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.60.mlp": [ - { - "accuracy": 0.9526961500707426, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9538832818599123, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9607785510781565, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9627786443421715, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9771317028881688, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9786401083692908, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9814617593135488, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9886611446640209, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9895684252934236, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9888815394063529, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9900866477799258, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941655869203571, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948926279566398, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968500293815803, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969858548138291, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975077604115206, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990656161435733, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.61.self_attn": [ - { - "accuracy": 0.9672852914388242, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9682781120860263, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.972305819980408, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9783581487932488, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9837793199541537, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9838296662721979, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9899773480321624, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9900009994100976, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9913282843894864, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9916984190135018, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9919309408747052, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9924834521340304, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9929587760646092, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9936228363382581, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9959862789653829, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966543358750641, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970445763442273, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982418148337227, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991942918430524, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.61.mlp": [ - { - "accuracy": 0.9513919782873831, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9526080787181854, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9597463167615627, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.961830706776757, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9765271721897941, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9780579966149832, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9809968312221923, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.988285376837379, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.989216367812141, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9885388165712357, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9897617330823681, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939339849607725, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9946605057798719, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966051727655882, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968089320280246, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973343170958718, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998754778441875, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.62.self_attn": [ - { - "accuracy": 0.9784874170038261, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9792156392512353, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9817912991305715, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9859864704408928, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9894568890725312, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9894249134353901, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932690836048048, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993272415288773, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9936403912050944, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9942105560024318, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9944749024059427, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948948650057182, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951105748278726, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.995702071335951, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972667191912862, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977144489714288, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979060608771091, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998798462213017, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993914919476466, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.62.mlp": [ - { - "accuracy": 0.9636034314569674, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9645018167793751, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9696953480965212, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.971229188144207, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9823819920420647, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9835605329196704, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9857201417417902, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9912665834601381, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9919898066130516, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9914383787269655, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9923956533158688, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954990034334754, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960715241338077, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975570214875532, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976805124162255, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980772994029777, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999299493468195, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.63.self_attn": [ - { - "accuracy": 0.9723501397590888, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9734348999826532, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.977477245817059, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9815791336035258, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9871476897479672, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9872221523209622, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9914867954181605, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9914452568382809, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9920577069646433, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.99311953187479, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9935985210008527, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9937861645594239, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994096733801263, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9950900046694043, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967380830969074, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972622749751041, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975013753068388, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985262246398059, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992291452179319, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.63.mlp": [ - { - "accuracy": 0.9629733223271997, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9638972064774287, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9692545422401867, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.970837771010242, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.982099387990801, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9832999869985016, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9855315310782508, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9911420653623185, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9918773789694043, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9913196350006681, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9922980234811181, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.995444756197302, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960280552653498, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975339172187409, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976570563219291, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980528246623611, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992987626219952, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.64.self_attn": [ - { - "accuracy": 0.9785141545396886, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9798406986029524, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9828692878057298, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9871545031568721, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9897625249516415, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9898107111404993, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934219277316803, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934231701650118, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993519228147833, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9942103042267263, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948697003633961, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951931912402966, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9955743013660571, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.995985255919789, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974561196898943, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978128851170799, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979448283679391, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998829221162994, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993768935168382, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.64.mlp": [ - { - "accuracy": 0.9625901986697787, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9635105693810865, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9690566584467888, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9706816238988387, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9819225410587693, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9831081809672085, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9854046120553425, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.991057457426857, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9917761949136069, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.991230225827741, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9921854130286527, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953912544725952, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9959664923800645, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975077122584671, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976272682818633, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998047119178074, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992977117623301, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.65.self_attn": [ - { - "accuracy": 0.9776632769131347, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9786358171779859, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.981327813559849, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9856183993581095, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9891907743325359, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9892218817319525, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9931526480389661, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932103135966157, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930834802308757, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993933751934061, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943865667538423, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948136840309751, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951249978897211, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9956111768015513, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971433249669835, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976201213246799, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997762152238896, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987377690359656, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993406579443743, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.65.mlp": [ - { - "accuracy": 0.9618697797781542, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9628221151468, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9684166103405387, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9700627238734773, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.98159493599087, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9827925964797798, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9851128462407934, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9909031890136631, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9916338831382362, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9910858357570281, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9920554748785338, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953175759506657, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9958990768063813, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974678703951404, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975914019956499, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980149191412094, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992883663263025, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.66.self_attn": [ - { - "accuracy": 0.9773089547494525, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9780627636140898, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9817865015938878, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.985988453667807, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9894247536890601, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9893826215871071, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9935676899836644, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9935102447818377, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9937792288776683, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945045016019752, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947496445751504, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951691699679941, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954800644497338, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960039953676689, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974902188834293, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978649667402926, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981677699160125, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988312537653224, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993649740859646, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.66.mlp": [ - { - "accuracy": 0.9615259545021936, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9624900622783523, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9681338097311949, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9698057089393076, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9814014081892214, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9826189491799787, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9849750560365225, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9907742124541026, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9915260534831568, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9909739927633813, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9919608428486084, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9952313322206273, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9958169197810716, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997363716118822, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974898651279019, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978997030136126, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990507300520365, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.67.self_attn": [ - { - "accuracy": 0.9768930749085388, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9774137850066549, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9803476738988569, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9842088722476834, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9882315704108853, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9881131801460135, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.99211464454665, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9920057199013076, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9922329738098932, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932640204696279, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938213143889841, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9944608831091931, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943598229438066, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953004097884619, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970391134878522, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997446883665888, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976857747625265, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985806936165318, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992856663026798, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.67.mlp": [ - { - "accuracy": 0.9613453691923305, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9623094220694742, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9679629340963929, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9696391362108683, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9813316163459891, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9825487219679513, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9849216533138564, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.990747979523516, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9914925822645033, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.990940498139121, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9919277800639209, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9952351823571677, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9958286485144574, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997416792731536, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975430979025796, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979800550745016, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992509476050098, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.68.self_attn": [ - { - "accuracy": 0.9698149329541546, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9705776387923643, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9748122885235047, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9795750610922512, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9856482651084661, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9858217686904889, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.991249151797475, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.991209947798205, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9921143380189804, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9928954670107678, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9929952384462875, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934534828521704, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9940387967385744, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9946994889821661, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967781981379774, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972333931976831, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977683004200164, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984564835475268, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993413814043283, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.68.mlp": [ - { - "accuracy": 0.960607956506704, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9616011935041139, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9673782201777947, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9691006413807994, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9809870142489672, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9822437408821363, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9846506975591183, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9905396637653834, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9913271095260585, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9907676238125485, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9917825229563996, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951473870559743, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9957561809324512, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973626996537572, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997483311894987, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979311214318793, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991897444093698, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.69.self_attn": [ - { - "accuracy": 0.9746434128794231, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9754299470468571, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9788573722011948, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9833640443945402, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9874723571793813, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9873557428976423, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9924810771144142, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9923179300599977, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9933061991377097, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938008602904645, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9937182100930888, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9942196897770229, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945870306293824, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951544373026607, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997013877342014, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974717908220267, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978794769543272, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986577374695212, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993503351211793, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.69.mlp": [ - { - "accuracy": 0.9597402405189841, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9607536793735466, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9666731048767504, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9684557002037764, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9805854769717706, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.981853283353542, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.984341058193853, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.990310158948169, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9911022696791119, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9905565445507435, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9915827226236855, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9950000145717671, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.995607190691915, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972051087717869, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972674844811034, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976960341875, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987532903140067, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.70.self_attn": [ - { - "accuracy": 0.9700776635620155, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9712770919462568, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9748408221884778, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9805623726723226, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9857298272514814, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9858565788323942, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9917454095802417, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9918886699076546, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9922813948145822, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9926341593412584, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9929776136567326, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9933426205902115, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938355909513408, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994340874328229, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965434933792016, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970144054876935, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975682205496061, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984187751971675, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993055079003649, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.70.mlp": [ - { - "accuracy": 0.9600840636381978, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9610625803470612, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9670844380008548, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9688118616805265, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9807038043478602, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9819793068852863, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9844479199106756, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9904503134618464, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9912156549674508, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9906239544629658, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9916397723810453, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9950516127344025, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9956617640625489, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972963068315661, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974516291587957, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979092180300014, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992083441864356, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.71.self_attn": [ - { - "accuracy": 0.9716005627261964, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9727827717403048, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9761530561862808, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9817176121041963, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9860314845940784, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9861663240253141, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9915327458446356, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.991644443012774, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9923807068874961, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930092201715237, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9933390363217577, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9937268422769481, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9942672354855427, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9946471124299263, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967995606068718, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972174589362878, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977684921499244, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984512756897235, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993381686294206, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.71.mlp": [ - { - "accuracy": 0.9602212309837341, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9612056297299109, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.967132432680381, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.968833322783834, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9808013268109215, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9820634622224852, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9844840320415402, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9905082541027743, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9912697556183526, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9906827411124188, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9917009917185887, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951060891004377, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.995717230877888, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997353063026247, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974759495939667, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979163864872566, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992336512292049, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.72.self_attn": [ - { - "accuracy": 0.9702006431394502, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9712622623498502, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9749237088192451, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9805932933170545, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9854316107536617, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9855653615178246, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9912462882746599, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9914017805192423, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9919668260079465, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9929162833447519, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930556258431783, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934822284175377, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9940990796557775, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945587231777608, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965337064043668, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971026092351071, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975242250917578, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998442200787586, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992308824408898, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.72.mlp": [ - { - "accuracy": 0.95967381506374, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9606809670987883, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9665950250468756, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9683086510355535, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9804914738786848, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.981805511211094, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9842458259020197, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.990377700912129, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9911696535878276, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9905574503844898, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9916127567228518, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9950484740057666, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9956803248925624, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973278756645557, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974552834234935, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997884820362455, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992425645560663, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.73.self_attn": [ - { - "accuracy": 0.9753982657076496, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9763727565541079, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9794563608930299, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9842119615426973, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9884018170598307, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9885559247708634, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9928900958038867, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9931206756124371, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993509787379911, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9937135519606894, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941914484247958, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945572576191473, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9950018908558903, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954052122407838, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972165114074749, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976376397412663, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979730295507532, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987333284072136, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994103102868203, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.73.mlp": [ - { - "accuracy": 0.9596491912870031, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9606393663898894, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9666415584322653, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9683804643389425, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9804882696388584, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9817797921990093, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9842330622241685, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9903333939268792, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9911291695101873, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9905322927393412, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9915681597041456, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9950274815782905, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9956459903501367, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972996293003425, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974405168629202, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997891428353461, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992161106906439, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.74.self_attn": [ - { - "accuracy": 0.9658851522560182, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9669471736017027, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9706414316437746, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9770519280512082, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9834767417295983, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9835267164499352, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9897688461320573, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9898223858081588, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9904596511470644, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9910179126370502, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9917462690439272, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9923048099061769, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9927262543259483, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932857144771046, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9957237140961775, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9964946757658923, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968064884091482, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998222872283996, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999045778497882, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.74.mlp": [ - { - "accuracy": 0.958424586998789, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.959424050230729, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9656293651971378, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9673778855879056, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9798788960630956, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9812297273525282, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9837689756562835, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9901173379713375, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9909144763981825, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9902452801746365, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9913277081949147, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948827966704572, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9955306564653782, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972521725453829, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973671193063063, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978202031802779, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992292272784796, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.75.self_attn": [ - { - "accuracy": 0.9774211833351537, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9782799936732963, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9814807661367875, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.985374618615759, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9891148013013759, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.989163166010066, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9936793799217987, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993695976727299, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9940041102419951, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945198826148713, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945071885411284, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948924183722978, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954054727660198, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9957618466700966, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971650137786606, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977147371177316, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977739776215074, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988418766144221, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992906042944493, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.75.mlp": [ - { - "accuracy": 0.9580646500383553, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9590505010595447, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9653112083477409, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.967084684654286, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9796554907960328, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9810272941954041, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9836038308414189, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9900182270395913, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9908467854471191, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9901619205055269, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9912599892306485, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994844092595342, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9955015996789658, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972357956545526, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973488898555699, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978009555672639, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992226127988512, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.76.self_attn": [ - { - "accuracy": 0.982124107840814, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9829511115033376, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9851966799776021, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9882744733538282, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9913041669091112, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9915502946823835, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945004965227685, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947388092497069, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948682376477671, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951645333908106, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954547425241846, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.995762405292082, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9961345334243226, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9963940196229439, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976979309362114, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980797026992628, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981361952009856, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990184806428213, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994538378638314, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.76.mlp": [ - { - "accuracy": 0.9579177890953264, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9589071942395285, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9651007986578503, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9668365862024457, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9795605144218394, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9809323557603516, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9834878420653311, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9899848041761863, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9907909359194731, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9900854221839261, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9911910987361089, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948043809027264, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954626974404642, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972172247760586, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973266773832667, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977802033168509, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992237107086878, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.77.self_attn": [ - { - "accuracy": 0.9745084073786673, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9756905180647185, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9791299573783028, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9846783805834619, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9876854461862853, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9878425227086011, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9927856712356994, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930261851680514, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9936042231948752, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994025786162207, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9940490578056166, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9944221571602515, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9949957351200283, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953411911173087, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997148198002067, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975951463290441, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997984487728804, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987067720748631, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993424334618459, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.77.mlp": [ - { - "accuracy": 0.95800853501025, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9589825121587828, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9651356577677161, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9668648189031764, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9795662206842711, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9809494138646283, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9834818704739997, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9899438858816498, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9907649112130074, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9900551419705153, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9911604336413898, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947563443136843, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954109859995937, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971285258468828, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970563851223376, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974591873929297, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985485624914107, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.78.self_attn": [ - { - "accuracy": 0.9831317381835297, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9837318143170131, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9859482199443799, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9888390343342173, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9919394635791449, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9920791600781836, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951646122836361, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9952452301758489, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9956087053410316, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9958365968502078, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9959981970758619, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9962267690142127, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965526823346552, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967932491011819, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980028830652469, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982816183535186, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985132484917382, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999085142948101, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995369989498469, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.78.mlp": [ - { - "accuracy": 0.9686130627401566, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9693476739094445, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9740836353678453, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9754914364924556, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9848056752234697, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9858365062330114, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9878035610737769, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9923784406365532, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930289487207407, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9925468673341369, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9933776871399268, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960730875425629, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965708242410696, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.99783774736754, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979612132914266, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983439846944652, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992957727609839, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.79.self_attn": [ - { - "accuracy": 0.9839585832761306, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9842948924357954, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9861968868951264, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9890670234729585, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9920464684990676, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9921245580097955, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948989485978688, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9950236090164828, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954726717336789, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9957793578995686, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960429809595409, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9964373543441883, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966924534988051, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969656727840438, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981159590687113, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984057582608473, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986475033373082, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991070533655338, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995531993244138, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.79.mlp": [ - { - "accuracy": 0.9686327504092141, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9693648078920025, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9741230599190059, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9755258030797306, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9848353670615899, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9858618102673637, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9878325801538793, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9924155830307618, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930694145150483, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9925754035479928, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934083340493473, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9961058856478255, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966074135522113, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978880577009955, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979882285207216, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983651489080665, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993611413764915, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.80.self_attn": [ - { - "accuracy": 0.9830739413829226, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9835871728137136, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9858511358891663, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9886484503206846, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9913256482074135, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9914191474549865, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943144708185604, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9944620916472846, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9950876101655396, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9955956252708443, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9958234033547342, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9961853162017896, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996478104375695, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967872420889571, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980106928177472, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983043121909233, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985178866987362, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990868340491464, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994907594610642, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.80.mlp": [ - { - "accuracy": 0.9684586053420055, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9691959884213773, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9739730369887853, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9753678422421217, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9847184835786098, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9857828514160294, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9877585764288118, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9923798946026516, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930501280009354, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9925187525192374, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9933878404431438, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960823019133195, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966004379951444, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978796701424902, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979812481538638, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998349435602952, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993771559093148, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.81.self_attn": [ - { - "accuracy": 0.9852870575299388, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9857414652054247, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.987789069498448, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.990418513697621, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9928620715723618, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9929482396446953, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9955328773143456, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9956466906802043, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9958512983407433, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960553942150191, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9964408081171936, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966132901363859, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968968252283766, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971645602000583, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982559181455719, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985331746161377, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986620195773676, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999205223296916, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995736895162783, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.81.mlp": [ - { - "accuracy": 0.9685598105976456, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.969297223185238, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.974111942084212, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9755060449242592, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9847793614393786, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9858139625407363, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9877794693273149, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9923918056301773, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930464691531501, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.992551460175922, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9933834616584998, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960909960056213, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965876131195968, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978671842132156, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979894736622411, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983682807436899, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999365209004425, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.82.self_attn": [ - { - "accuracy": 0.9795969171743644, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9802538705009379, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9829632624689686, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9862524025436294, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9899979424172718, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9899138592085556, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993484672281499, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9935678514700971, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939560204940406, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9942416747982957, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9949203152332062, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9952767193072328, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9955373360211716, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9959899618069789, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973737645992323, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997792223272355, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979245837986175, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989063642477911, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993351420608202, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.82.mlp": [ - { - "accuracy": 0.9680046550929546, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9687574492081216, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9736283779340354, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9750069209227437, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9845375826974448, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.985581859750183, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9875791046866461, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9923433253639623, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9929765854275933, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9924530377366433, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932977073384743, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960531965020651, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965555399999415, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978743929134094, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979668870310936, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983303443173339, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993799422702164, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.83.self_attn": [ - { - "accuracy": 0.9860416863015607, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9866805060913688, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9886731561273336, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9908229524612819, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934105936094726, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934837453704524, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960753710352277, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960556541397971, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9962999442298162, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9964864021534786, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965120439025524, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967958895783675, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970179257889915, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973920784388227, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981705282606479, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985559421015511, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984999259835795, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992343780345676, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994470653687849, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.83.mlp": [ - { - "accuracy": 0.9681064819819049, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9688365055542243, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9736715265010533, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9750425768525977, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9845334498309776, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9856021958158204, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9875802505565318, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9923483165177075, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930039789086502, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9924596599303186, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9933203284040486, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960576854523664, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965677451783497, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978778166696429, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979685405759435, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983270088468041, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993803393138622, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.84.self_attn": [ - { - "accuracy": 0.9888508732087518, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9892432714665407, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9908053679391742, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9925294970162213, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945164334617163, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945143674941439, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9963659787379009, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9963858902797496, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966423784284607, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970030257061712, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971114994320822, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972776924014876, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974576614086369, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976803496982413, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984680866871617, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987414665949089, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987087555376715, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993542001895165, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995903496930719, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.84.mlp": [ - { - "accuracy": 0.9681670771617639, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.968899235913628, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9736265702859351, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9749523701244279, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9845694217242693, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9856149533665494, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9875545068889072, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9923867281085175, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993004123851853, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9924771908768698, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9933124888247173, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960665630519783, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965635883425804, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978874886261397, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979743821374876, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983224256674906, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993842643820435, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.85.self_attn": [ - { - "accuracy": 0.9834605685778355, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9840068226974261, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9864515139427232, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9898155513219535, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9920050456972891, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9919920046568701, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.995031622128169, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951582109727162, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.995682460059853, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9959483084906089, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9958179056717965, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9961976645955521, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9964062950304268, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969929919982525, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981485347839465, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984179374296218, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986533245598701, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991091186009151, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995347940268019, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.85.mlp": [ - { - "accuracy": 0.9683751934453061, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9691060805007031, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9737499343525422, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9750481253783954, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9846380517670983, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9856953443469185, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9875904729981956, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9923896012141517, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993022063575489, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9924772931507936, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9933257138748702, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960538966483191, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965572997094377, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978484337855327, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978313529496327, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981445772695894, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990042996942066, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.86.self_attn": [ - { - "accuracy": 0.9893789437697514, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9897087601895788, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9911707663222363, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9928744402468989, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9949746674392372, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.995003640909042, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968424622508648, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996884660194873, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970869345982608, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973371445666999, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974588037247917, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976275418517425, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978567636571825, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998004170153045, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986781838080404, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988607162621951, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989331915796931, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993597719922515, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995755278735152, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.86.mlp": [ - { - "accuracy": 0.9685414952078932, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9692632489298519, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9738814395509268, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9751558986149336, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9847008379078225, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9857409825352462, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9876255395479108, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9924642319889053, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930773569074901, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9925267678734503, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9933641175739467, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960768965293506, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965682428109607, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978596738686687, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978378190303614, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981439579094417, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989999709987866, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.87.self_attn": [ - { - "accuracy": 0.9895009518551984, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9906048335036949, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9917315720139366, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9927964451918868, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9950606181580377, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9950352239814636, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967942052937456, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968989958717046, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970570969959035, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997208540830271, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975411808608394, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976891737774407, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979490266169274, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981257125526961, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987929412776506, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999013054552243, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989893733352226, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994817680853885, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9996686048735252, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.87.mlp": [ - { - "accuracy": 0.9684928063499301, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9691935759037733, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9737774429744795, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9750413645647074, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.984653607746096, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9856992115413672, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9875766207022887, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9924468475786087, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930621351752627, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9925119273217493, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9933485090242404, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960841661637747, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965840261797175, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979027043108976, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979809646886822, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983146791713998, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993862434945952, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.88.self_attn": [ - { - "accuracy": 0.9911395395664793, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.991504790923117, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9926836922715762, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941978513900387, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9955188610689029, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9955450809609733, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969361829203799, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969792163465172, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969166158575957, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997197949155969, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975120194109255, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975722089691675, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976951810715132, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978799589934122, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998643774428944, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988420613655379, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988243529744643, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994488473764106, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9996152825907828, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.88.mlp": [ - { - "accuracy": 0.9684562566445062, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9691550003266648, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9736918228629389, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9749449767956608, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9846198399502196, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9856761089575133, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9875471279221145, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9924331912280697, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930557379695145, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9924902801009777, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9933397239319196, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960756544146294, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965807835841062, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979026296641678, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979779529569083, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983057534003532, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993939547260341, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.89.self_attn": [ - { - "accuracy": 0.9895270110941247, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9897486816993669, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9915735714725757, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930772363502336, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953276954689309, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.99536451479224, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970936454759028, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971711570397019, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969084737332243, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972084490582347, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975721441503418, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976831523787328, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978339416955254, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981152082389024, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987857506248953, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989065935982293, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990363822680114, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999390812439072, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995883270488488, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.89.mlp": [ - { - "accuracy": 0.9682095752734887, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9689129371392099, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9734805428275937, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.97474406904688, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.984496222956008, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9855524361819813, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9874457547833261, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9923936757994326, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930093803099895, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9924499064095711, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932939470500538, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960541700282576, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965557286295256, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978884272154813, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979610221558496, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982899129880887, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993687632959336, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.90.self_attn": [ - { - "accuracy": 0.9858395326392431, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9860707465559244, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9877107374517149, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9897372208437637, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993195776460006, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9931557190996644, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9956569895393362, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9955990651720449, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960856629094403, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9962158484727537, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996430765641363, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967580090007303, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969099260204913, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970913542385557, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982408063339168, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984900905432081, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986386151040757, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991910306883878, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995470701817938, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.90.mlp": [ - { - "accuracy": 0.9680472832957381, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9687557499855757, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9732926321264944, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9745402591989228, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9844112964837175, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9854667068606144, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9873328641743252, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9923509293186822, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.992964649499443, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9924004258784024, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993243674453544, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960230143471179, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965240179904198, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978633045623275, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978708356226745, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981867026562166, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991479462570216, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.91.self_attn": [ - { - "accuracy": 0.9915797431757184, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9918715139164737, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9929005194602436, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941452625207603, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9961177537119702, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9961309238753625, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974906738651427, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974905459063226, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975304477725571, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977571300757853, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978338618591232, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997974144797282, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998037149235116, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981856232288441, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988737498673512, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990221949288083, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990470608984316, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995096117582809, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999664582033323, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.91.mlp": [ - { - "accuracy": 0.9680357068581016, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9687387997186497, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9732292573899031, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9744667173608353, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9844056266409001, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9854484146068755, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9873102101448336, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9923667100650307, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9929682784223635, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9924166684382056, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932456808654886, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960332633682379, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996527718049229, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978841680261356, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979547661802682, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982802766730616, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993863701069801, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.92.self_attn": [ - { - "accuracy": 0.9909374225198438, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9912181844395634, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9924937127599198, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939977304740367, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9957807335610452, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.99582037471823, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975125317976467, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997553145335252, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973628569247299, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997513719215548, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997929551595773, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980338196037337, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982316008474874, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983063455989682, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989318647391261, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999050513700288, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991923102949697, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994514393191295, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9996357509563064, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.92.mlp": [ - { - "accuracy": 0.9678827855539949, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9685889520731411, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9731013014128334, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9743507689747372, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9843226460445869, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9853726398493898, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9872551253065467, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9923322882111135, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.992946614707379, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9923834564948553, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932253548973485, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960161115925171, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965188003537294, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978761905559191, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979476175136178, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982744757343378, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993921720520839, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.93.self_attn": [ - { - "accuracy": 0.9860100671648979, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9865851679228639, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9884208480484391, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9912963401967365, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934570186673418, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9935030244339845, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996171864840251, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9962099588255545, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9962957703974098, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9964825718933227, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966873699051672, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996852044579818, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971182220676718, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.99734129567361, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983546447842137, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985915210655596, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987331653246656, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992554348565662, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9996018328039147, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.93.mlp": [ - { - "accuracy": 0.967637451757726, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9683554107813459, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9728505279084569, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9740934647913826, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9841989949345589, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9852561441397196, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9871332263083834, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9922524107326018, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9928709013681662, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9923032620725664, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9931518357051047, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9959770158840049, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9964825976272359, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978538382197976, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979278889827823, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982556943034165, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993783086262594, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.94.self_attn": [ - { - "accuracy": 0.9918074437270039, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9918727591682813, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9927852015305114, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9937371960841119, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9956357103012698, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.995576238958165, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997554463505941, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997543257367062, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978438069198379, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979246441875339, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979903425004235, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982210040509113, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983430956551609, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984794302635189, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990311269587102, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999184956596429, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992209518187385, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995532757449453, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9997082698307502, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.94.mlp": [ - { - "accuracy": 0.9722169322991058, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9728399000473713, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9767590645504626, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9778607989612379, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9864518025791958, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9873801924680409, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9890101439270534, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9933415404520929, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938922082319072, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993414648612471, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941523547137254, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965434119987645, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996985007800456, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981400011916106, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982107413418003, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984988466236054, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994429129384164, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.95.self_attn": [ - { - "accuracy": 0.9931165376482042, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9933478464372456, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9942281430244053, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9952675363441047, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996316272912449, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996352145043937, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973783845351519, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973711910363483, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974801050572607, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977284529442457, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980260289494732, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980683056515103, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981658452092425, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998330411213254, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998902296803671, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990898407658709, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999028440626142, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995584360654711, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9996699441689998, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.95.mlp": [ - { - "accuracy": 0.9723062733874509, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9729386244557405, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9768055305375081, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9778893690458254, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9864762368072805, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9874238643893286, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9890371246361419, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9933600126961736, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939228145062531, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934220698564068, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941757249180228, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996548880443075, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969971394901606, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981447844643538, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982156861009762, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984970664433939, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994512508197467, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.96.self_attn": [ - { - "accuracy": 0.9921447387896478, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9925206850625967, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993824716755434, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948737964181131, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9963011128068167, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9963517372471917, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976521953450221, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977586913277934, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979740766636831, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981118054190454, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982489912010926, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983473352254614, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984961832003472, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986152433312351, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991003069659009, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992128837835289, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999278363894279, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995266034077902, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9996587467566745, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.96.mlp": [ - { - "accuracy": 0.9723059237983666, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9729324707663373, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9768153387366941, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9779023141355107, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9864910288076651, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9874116944051102, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9890345305841612, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9933952956803536, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939387109571773, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934455075448281, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994190612158395, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965658593604243, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970076070412209, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981572438815707, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982220299088543, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984971973677411, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994469979244872, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.97.self_attn": [ - { - "accuracy": 0.9893234109653062, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9895140118161706, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9908460756077578, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9922798598479283, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948811750172785, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948197360959297, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966170706198012, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966088962545129, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969785542742006, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971636228483954, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974109761135065, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975161199752045, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976600358863116, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978726538706963, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986224417991348, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988603113605151, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998882072521251, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993619967778948, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9996189806245782, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.97.mlp": [ - { - "accuracy": 0.9722248868722665, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9728489981866196, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9767108429223299, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.97778254840523, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9864540430472085, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9873683005081195, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9889668836503437, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9933784369526333, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939104302565714, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934305218048394, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941575868471869, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996544817536089, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969769809456346, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981346514970554, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981525073342613, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984215673941531, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992687298524144, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.98.self_attn": [ - { - "accuracy": 0.9934138620370313, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9936582755640542, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9944696832497261, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954045745176509, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969884213917938, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970207715554065, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998007452489171, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980737261510896, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980496846531567, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981834966946688, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982558966822628, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983922109975921, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984165625033998, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985549313592186, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990608641160897, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992117786957686, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991831043122434, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9996046806229156, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9996980674240101, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.98.mlp": [ - { - "accuracy": 0.9722269697997131, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9728481472518883, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9766556820096938, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9777135240698331, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.986442248091886, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9873506661112371, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9889379253512934, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993384454129754, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939113060796732, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.99343357563607, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941550121201497, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965479594263199, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969831934370297, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981508443658975, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982134170009216, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984880160742537, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994524454952242, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.99.self_attn": [ - { - "accuracy": 0.9933367210783457, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9936126206364286, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945513924069115, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9956033179585478, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968242123968115, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968667799901021, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980768329512916, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998094582868936, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980593747275538, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998163037976299, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984354347995433, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985053151858186, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986558079621509, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987340872175992, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991566445433387, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992405033982849, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993202160051289, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995728600295383, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9996757588876215, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.99.mlp": [ - { - "accuracy": 0.9721097962832764, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9727402673544068, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9765679435314316, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.977635806132304, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9863947765215447, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9872999581459322, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9889057716599813, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9933759128233712, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939052754841549, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934271858318856, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941537283287433, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965535643093876, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969865346976899, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981564877885639, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982203615976399, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984910111157469, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994576836210112, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.100.self_attn": [ - { - "accuracy": 0.9894482199053624, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9900129209027478, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9915262578104279, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9936406521837374, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9949683818451472, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9949841154861803, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970154558847609, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970351732826155, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971954932644669, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997262489712356, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974310596926922, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975665959125188, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977944766563412, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979093759810846, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987144565583501, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989093532050518, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989798869845752, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994016610343304, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9996576708355477, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.100.mlp": [ - { - "accuracy": 0.9719527226529623, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9725862578733971, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9763899794044463, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9774462378730899, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9862856300743786, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9872186455483499, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9888106588960478, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9933033065478268, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938457141458792, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9933479504022551, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994093903928603, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965038671967035, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969502786836145, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981264484016911, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981901153101047, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984648392174246, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999443773259899, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.101.self_attn": [ - { - "accuracy": 0.9917354291108879, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9921364913890628, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934271384079597, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948591919934475, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960397550542104, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960712731056112, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973382456595764, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973635831909058, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975044605885878, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974780849695793, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978911908233146, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998021778705726, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980986573071661, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983179944653162, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989025638512287, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990687127941974, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999054534817237, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994565587732819, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9996520965528601, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.101.mlp": [ - { - "accuracy": 0.9718760257880938, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9725037313213474, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9763234718457648, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9773838649945039, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9862413069625434, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9871741294811823, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9887621875264143, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932947317383399, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938378996264777, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9933452927262375, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9940842004541895, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965022437789134, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969448302137224, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998128377011438, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981897421315998, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984647072667844, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999441338788816, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.102.self_attn": [ - { - "accuracy": 0.993049476663337, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932802431775551, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994244459158692, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953563358193558, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967697830870748, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968124185688794, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979815949489804, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980149805656096, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982294896617532, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981252726498305, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983760157514265, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984776129958367, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986114941635414, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987112459861428, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999137228121981, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992435789593545, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993021542812992, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999547401372662, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9997041381000983, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.102.mlp": [ - { - "accuracy": 0.9716338259413054, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9722728915512562, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9760763872120726, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9771347723313069, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9861010344404924, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9870576735242809, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9886466483095366, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932332597182769, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9937787620889905, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932769882051569, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9940323479669658, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9964684741816631, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969209944706803, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981074236259845, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981701871147379, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984367823038299, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994297538436704, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.103.self_attn": [ - { - "accuracy": 0.9902170392635622, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9910102309659123, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934064965744159, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9944832852159283, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9956235096201693, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9956107891017669, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974152022286465, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974195159922697, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976126218373936, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977144834426183, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978130402374327, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.99790402952165, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983457769083447, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984666071219468, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990272050031068, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991777616148618, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999180278275162, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995642639788505, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9997028655986804, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.103.mlp": [ - { - "accuracy": 0.9714234912473905, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9720750649116541, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9758835656470374, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9769395145734674, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9860232501829925, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9869623861373648, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9885515079245364, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9931835922363558, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9937261000251103, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932260615751147, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939766073182813, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9964408722191461, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996889396221377, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980950889876112, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981602371523255, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998435128507768, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999443012645076, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.104.self_attn": [ - { - "accuracy": 0.9930498262279128, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9933672113785226, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943905830334284, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9955153322754133, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966522683879655, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966550013050437, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977891772164424, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978343096782306, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979484225929666, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981122677594325, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982251397726175, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998338234427042, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984260348198739, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985335440938606, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990625118463006, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991709235534807, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992115052208599, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999572495237532, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9997102373010037, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.104.mlp": [ - { - "accuracy": 0.9711823010523069, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.971839279132454, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9756464720458576, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9766962518425364, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9858941556395668, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9868466917082275, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9884208232948655, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9931001759281284, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993651110923996, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9931446796185092, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939042911374647, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9963807071142486, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968284351542887, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980227800245446, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979678733017001, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998213786251941, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999005420884044, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.105.self_attn": [ - { - "accuracy": 0.9937758211928763, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9940452969780094, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9958397803313442, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967243859642431, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970435255427698, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.99709079991781, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983526396896004, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983693647847854, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998508197929416, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985448756671854, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985624737915044, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986392177187985, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989897264045124, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999067399491507, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994185738400311, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994775786364795, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995160676358195, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9996879310324163, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9997911956023401, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.105.mlp": [ - { - "accuracy": 0.9708536041802481, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9715183573333841, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9753550924850922, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9764232398255875, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9857265055669766, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9867041806298259, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9883087775424907, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930523313876045, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9936098791046166, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930958806561601, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938674246230604, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9963759968400394, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968373808262282, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998059817816523, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981214967066128, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983908770495633, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994176318440097, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.106.self_attn": [ - { - "accuracy": 0.9947423887757683, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9950173219273749, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960000711848567, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969352004240806, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997516082440454, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975348047201374, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984008520106343, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984044415484133, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.99856267741722, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986314582975434, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986807048982499, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987353917370599, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988613892652977, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989286412325639, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992957844637873, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993813825222835, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993910748759701, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9996610406485974, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.99975494783001, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.106.mlp": [ - { - "accuracy": 0.9705155976116657, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.971187644785172, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9750488017146525, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9761223784696899, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9855766588527906, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9865426233185357, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.988155533388061, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9929675047628974, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9935269195488409, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930235837390157, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9937857139340945, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996332939346566, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967914966890883, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980394312879071, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981060988441306, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983846514491903, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994318892851496, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.107.self_attn": [ - { - "accuracy": 0.9905530700803196, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9906902678899074, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938275819448265, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945722324038414, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953637264031721, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954469287277836, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974010111273903, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974959465969158, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977066704917601, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997800443450136, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977142360232967, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997851897796959, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998433743977282, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986006847864605, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999132121759957, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999239615835617, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999258732858212, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9996237390411185, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9997264650431278, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.107.mlp": [ - { - "accuracy": 0.9701196503286299, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.970791284190981, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9746724026963899, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.975746597653549, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9853664273886305, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.986352905312455, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9879642279916688, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9928433864230388, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934108433019566, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9929001730082458, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9936769177990132, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9962492873647103, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967111397693914, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979501455948737, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998013483826071, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982815587366196, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992340013881116, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.108.self_attn": [ - { - "accuracy": 0.9925635545269439, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9929228389978801, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993951700382719, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951997116502178, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9964630299149767, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965245128658257, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976219452647982, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977114596894305, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978209474785743, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979554130266862, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981386151630431, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982491793956509, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983710572307342, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985000225960424, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999037556420693, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991662218909416, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991823773291942, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999496095986902, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9996881334556843, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.108.mlp": [ - { - "accuracy": 0.9745076714377654, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9750887092300936, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9785028692628992, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9794432841437427, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9875298056653455, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.988370757844103, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9897803462012426, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939054075236383, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943976478994285, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939539671360859, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9946205497008601, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968243858924037, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.99722318704191, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982975274385688, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983474714007523, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985908054544492, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994590809020385, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.109.self_attn": [ - { - "accuracy": 0.9941180190071464, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943112289827121, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.995159650821925, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960937783930843, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972378676267046, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972680507374829, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983344564128569, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983723764369068, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998297714873364, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985495569478524, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986055363582349, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998708683786007, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988025565766484, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989102369045691, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992562336724644, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993504663947724, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993965290103868, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9996033302342863, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9997292700886886, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.109.mlp": [ - { - "accuracy": 0.9744381636479184, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9750179231755043, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9783992869779468, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9793316204787085, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9874854910579559, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9883325867796022, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9897341424060103, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938847693173509, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943715237375153, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939262913662548, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994599582039212, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968119149684513, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972137751020981, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982890238170512, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983447650508759, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985844726079928, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994724989596043, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.110.self_attn": [ - { - "accuracy": 0.9937804497435296, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941164477434206, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9950590095188665, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9961042816897756, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969669348976918, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970204139030293, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998017006075794, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981048828003144, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982279954036992, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982981347466672, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984404747496898, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998545089393462, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986688011650633, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987629169880069, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991989838461833, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993350493620566, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993294881975376, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9996184782511065, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9997364121487695, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.110.mlp": [ - { - "accuracy": 0.9743825954041982, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9749765422097162, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9783477743989543, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9792717432015037, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9874656862873388, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9883085672351483, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9896997029549981, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938526620818792, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943385594401901, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938974795054252, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945650305476432, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968027823702678, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972029473559049, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982874753921734, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983464261351497, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985876988411244, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994873240341947, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.111.self_attn": [ - { - "accuracy": 0.9943362491965098, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9946108028096589, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9955257944591147, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9964638736564666, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972520145207742, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972287288401276, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982016882185187, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981683453753296, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982738483415329, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983712308795044, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984719563324592, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985888151170098, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986596856339786, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987608374151598, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991708315087875, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992858258927683, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999278069801604, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9996316468903771, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9997285051856459, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.111.mlp": [ - { - "accuracy": 0.9742577511719183, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9748561563166348, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9782094125096735, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9791205960669016, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9873899341511884, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9882445401806188, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9896165218302294, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938103231650434, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9942956918017253, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938474145448325, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945222955736283, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967540092337387, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971500400294501, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982143703750089, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981810992681667, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983962265235421, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991043767934714, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.112.self_attn": [ - { - "accuracy": 0.9942778011440838, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945278017487573, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9963189536877173, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969952935720549, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972668427998495, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973849260591363, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984998993796149, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985750692921054, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.99864179667355, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986696934670602, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986861759465874, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987493920856841, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999101457118645, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991797580978049, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994673804597513, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995301517866257, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995410234213954, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9997161312494427, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999803296995723, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.112.mlp": [ - { - "accuracy": 0.9741343662730957, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9747318692113224, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9780962206423283, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.979022995372744, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9873301665984878, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9881901039104712, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9895789126718515, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938091808856514, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943006195887727, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938534126175862, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945312662564806, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967744425858224, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971801577760887, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982707428739799, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983279551685739, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.99856570940526, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994769720115552, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.113.self_attn": [ - { - "accuracy": 0.9953086233183154, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9957237150152459, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965692304601697, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973024230842528, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978215459819981, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978347705481084, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985935675029299, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998598374460064, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987525641856911, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988071141078284, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988440823324612, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998912920342072, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990001444055355, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990656289033053, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993753261744707, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994483946571372, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994498584787116, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9996876886664388, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9997713692895271, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.113.mlp": [ - { - "accuracy": 0.9739059908315539, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9745161377481724, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9778923290830693, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9788187365292719, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9872330485990173, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9880817572243119, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9894735399682663, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9937587015968012, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9942486483824292, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938088112241147, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9944796967442686, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967506102984771, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997153282747604, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998260898356277, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983201411424057, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985607113278667, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999486234068172, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.114.self_attn": [ - { - "accuracy": 0.9947585513521182, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951168516601779, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9958183890159585, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965195448113311, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974095211241787, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974497260746399, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984099464368468, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984445295750016, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985271884361282, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998645556085792, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986636376339256, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987541512053116, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988860659711798, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989512662868947, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993221040073733, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994194091337513, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994364472161243, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9996859323896627, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9997708203491608, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.114.mlp": [ - { - "accuracy": 0.973607102752124, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9742294829828959, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9775965032902988, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9785193778378399, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9870669482471911, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9879400310547728, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.989325307334136, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9936651732944148, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941617529104022, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9937104948208129, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943965453973138, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966843609060896, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997094399228349, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.99819510350445, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982399586955771, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984705644432994, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993225019210704, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.115.self_attn": [ - { - "accuracy": 0.9947470522632724, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948853110208323, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996076100027973, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968063444830477, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974202872008869, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974368568754902, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982601299171189, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982437532346108, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985666002980188, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986620442757971, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987394666011249, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988359482162387, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989721849744552, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990464368387811, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993724076974966, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994534797779603, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994683887006862, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9996747047721568, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9997469941023921, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.115.mlp": [ - { - "accuracy": 0.973152583612031, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9737888689790117, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.977188080745308, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9781207759913645, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9868448969515923, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9877352730224007, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9891373449691424, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9935632073688075, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9940738177515174, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9936102777719498, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943145336705799, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966427218107703, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970632468190926, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998193988656184, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998261635537921, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985010752907807, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994579500926193, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.116.self_attn": [ - { - "accuracy": 0.9924893642922765, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932451860513538, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945029834481446, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9957256753022146, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966231664875522, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966595607233772, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979213242676404, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979508126791763, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980946314018709, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981875462016385, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982871699551317, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983915721011152, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985631547996922, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998654290090168, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991549432528891, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992613118972115, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992987931105554, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999569872164437, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9997320639882482, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.116.mlp": [ - { - "accuracy": 0.9728820474916383, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9735190134593531, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9769167012387985, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9778505517660003, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9867192928826338, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9876144156840286, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9890146070944243, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934900713422777, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994004303675243, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9935427647034981, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9942509139103717, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966117791022713, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997035325137212, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981837891667199, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982467527305218, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984878579914374, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994587567405113, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.117.self_attn": [ - { - "accuracy": 0.9952558605490547, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9958725894041556, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.99675552587417, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974882826454153, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978058383873615, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978393156902472, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986901895363668, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987450114782214, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987818807283858, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988160172697941, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988642120672586, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989237791834105, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990700142929869, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991408941936434, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994265313802507, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995060644593506, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995060667080045, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9997284055831475, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9997903284046946, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.117.mlp": [ - { - "accuracy": 0.972525862339688, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9731775319791938, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9766164911224654, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9775686789500085, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9865349811550818, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9874496899153057, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9888718055659219, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934191391803324, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939375425027194, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934622813383803, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941861782664139, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965684400797871, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970000592109404, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981633316431391, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998226390324386, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984702309654829, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994633294378505, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.118.self_attn": [ - { - "accuracy": 0.9932420562928248, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9937731364839956, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948481763713062, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9961056764617464, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966928760864233, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967268356305891, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980088785217193, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980240030532801, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982167528819685, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983556747718371, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984625114777469, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985562613001093, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987373911242589, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988378493029526, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992797417707104, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993641233702778, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994402079181256, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9996006427934748, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9997602463998575, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.118.mlp": [ - { - "accuracy": 0.9719507954034367, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9726186835844266, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9761067993056617, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9770718132096686, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9862328916788101, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9871898687591678, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9886287689258001, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932557448842808, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993801498185157, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9933016150822177, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9940586609422768, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9964837512846938, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969332553017395, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981086773209666, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981724868932935, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984168206268039, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994121644030154, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.119.self_attn": [ - { - "accuracy": 0.9951654054763678, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954225070112827, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966361407362121, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972459735581651, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976361086101908, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976572231695938, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998321240890379, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983541762738145, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985038953845536, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998578823520802, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987610936269017, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988543521735425, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989619572971653, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990420543196562, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993534700234273, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994672140532394, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994077241158505, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999689302044939, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9997463033819498, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.119.mlp": [ - { - "accuracy": 0.97104265398689, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9717297021201566, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9752900519555336, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9762782930935684, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9858074164949358, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9867722047737947, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.988237506415891, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930398419352346, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9935957455860549, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9931086622497165, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993867013860788, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9963831232836176, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968356418876761, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980572826032968, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981320676712417, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983856890375089, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994311081184271, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.120.self_attn": [ - { - "accuracy": 0.9914937273746258, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.991813424876646, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9928679050338504, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941030989866704, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9959470849288138, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9959671858722638, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997513774420919, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975294146601013, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976999143163037, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976929979656186, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978328499526373, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979946685673081, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980686120688915, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982791908551008, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989128488324288, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990727306729624, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991290696994647, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995180418650547, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.99969466937182, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.120.mlp": [ - { - "accuracy": 0.9703317818869102, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.971044797499321, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9746256795662799, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9756284084073023, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9854524958094484, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9864546687489277, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.987931813897663, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9928581838888165, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934323667671139, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.992924072179257, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9937134434003383, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996281513331556, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967568456011481, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980028034861836, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980777333380262, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983323274125149, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994138980079075, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.121.self_attn": [ - { - "accuracy": 0.9884362208019746, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9887029855561099, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9906698089387072, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9922208832331786, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9944911919240105, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945521926825964, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966446913558206, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966792392691499, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996805185770714, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970147729189577, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973112653119882, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975051854393984, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977629545058957, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997926120199018, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986478579169336, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.99879410279927, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989158344534715, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993955528296187, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995926806969739, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.121.mlp": [ - { - "accuracy": 0.969566213457208, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9703005735615367, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9739530318367638, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9749712697965535, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9850859062648133, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9861011216416955, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9875998021789679, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9926611406385506, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932418783734503, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9927442319151994, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993538096657415, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9961664711835941, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966367817373554, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978892007462778, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980348596337104, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982923663460257, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993875419742452, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.122.self_attn": [ - { - "accuracy": 0.9926015441918647, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9931840454718392, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994389387840209, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9955948033541637, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965367362111512, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966123377794007, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978239578623814, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997892721353932, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980073635968821, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981170069475315, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982671354113049, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983681283070167, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985518501611965, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986475291263656, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991505815138991, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999263819536856, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992973237929522, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995658891681447, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9997381839639274, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.122.mlp": [ - { - "accuracy": 0.9739901925878305, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.974616008810699, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9778230773579133, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9787142607138345, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9872500572451636, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9881167798793238, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9894470206804966, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9937802166923096, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9942720694295866, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938268717378378, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945106054411122, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967485608986432, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971571102628092, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982517120354858, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983129992462253, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985394010620535, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994761349118658, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.123.self_attn": [ - { - "accuracy": 0.9963672585956949, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965622230972114, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972177199884563, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979234538347411, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982603863739458, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982715999388969, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988831448074627, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989132638254791, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989751833552298, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990043565967904, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990519719492448, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991029852658118, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991887904991592, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992343518901371, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994819875206112, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995394712451869, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995516136098702, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9997436154103765, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9998025021777721, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.123.mlp": [ - { - "accuracy": 0.9736479699219528, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.974275733099172, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9775240316889003, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9784278295149929, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9870652142225912, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9879581008449589, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9893053739919866, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9936953585496858, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941958394636842, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9937423069250623, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9944407781708593, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996704186551812, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971229704280727, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982307827847666, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982923904164253, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985207223860351, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994778883931423, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.124.self_attn": [ - { - "accuracy": 0.9931612514866222, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9936879487757228, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994973536580801, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9961200007360036, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967722164920384, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968565288918877, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980859720258435, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981308121381229, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981714813651419, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983094501566436, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984435758700496, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985612912477289, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987658877078885, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988455673385608, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992895067047548, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993692867497693, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994317190813538, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9996005638363146, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9997567678510677, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.124.mlp": [ - { - "accuracy": 0.9732735730511578, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9739171467525395, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.977171752454811, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9780836507285896, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9868672276650997, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9877927240572477, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9891464874687556, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9935977072454989, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941172118568303, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9936393740905547, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9943717618129755, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966491536899028, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997083800134102, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981906930732197, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998259398059944, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984836198671377, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994432133741946, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.125.self_attn": [ - { - "accuracy": 0.9953127465406922, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9955531654291248, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968883948045244, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974060030102632, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997729704237396, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976753312420394, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984318828761676, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998293997272604, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985770536831727, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986494171653727, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988322403055223, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989007755740251, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990043795551173, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990969372876478, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993591051560974, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994840632349032, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994010185504234, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9997013721578313, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999753784658663, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.125.mlp": [ - { - "accuracy": 0.9724368130984274, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9731052939927107, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.976426040998807, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9773559947743228, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.986480463345192, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9874019192188586, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9887781606889084, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9933919098938963, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939225586008673, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934608347248286, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9941865773136286, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965542349596753, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969877402886356, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981429032356429, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982156716838577, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998450895109655, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994494026223516, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.126.self_attn": [ - { - "accuracy": 0.9925919346707431, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9928319154944467, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939410755239231, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9950087180263117, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9963557428288224, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9964100531375918, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997730534499217, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997793549714659, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978982186263525, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979918700042426, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998105365574654, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982069449278673, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998338930167895, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984492256894315, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990378956212417, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991510562433273, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992195999764494, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995487697244818, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9996913248048115, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.126.mlp": [ - { - "accuracy": 0.9717253930866718, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9724178686247844, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9757367933757211, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9766744044854453, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9861397554019564, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9870888101133076, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9884666931864462, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932098256203493, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9937572275582505, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932902069076112, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9940373688372538, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9964639654407572, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969128460108646, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980926796867463, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981703892852621, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984071391518228, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999435920046189, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.127.self_attn": [ - { - "accuracy": 0.9892488171248451, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9895101833451343, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9914081477266001, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9928775639763396, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948491449222753, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9948937672827589, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967875248188839, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968406247636793, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970220451880443, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971346502118793, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974317884035898, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975384438569707, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997790271894222, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979815175082829, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9986333946366549, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988593549780106, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988605788032721, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994229259134858, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995955667093911, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.127.mlp": [ - { - "accuracy": 0.971065524377321, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9717722020455097, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9751195831126288, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9760688539888513, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9858131704263782, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9867832125654739, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9881707250483727, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930445381048086, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9935969026930827, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993127695542123, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9938879089895636, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9963602886510718, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968110122105205, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979960830073411, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981299426565927, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983629083941005, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994092779969307, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.128.self_attn": [ - { - "accuracy": 0.9913047152620397, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9916926147661319, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993178474704588, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945567263853982, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9958105496694579, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9959395110827723, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973937976357871, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975649451432553, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977225065292594, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997838550602625, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979861818413299, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981393329737904, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998329769519746, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984696808785788, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989840197346271, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990793574987421, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991891661714027, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995101159392538, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9996528292540461, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.128.mlp": [ - { - "accuracy": 0.9700837602936908, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9708349909633398, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.974253333663862, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9752415848503772, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.985319400647361, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9863358930390524, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9877605779880756, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9928120274930016, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9933894374652913, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.992908388278202, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993706468774594, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9962657987703815, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967420248461789, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979825198196953, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980669018726698, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983074224682672, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999401896635965, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.129.self_attn": [ - { - "accuracy": 0.9906053686220395, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9910417840767064, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9927442697808146, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939715704071874, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954579589202216, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9955251529126575, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.99710694684549, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972001509373322, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997535437657988, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976446505844299, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977657166983638, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979663342114931, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981818250686812, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983349551667312, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989318838036668, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990788766023654, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991667036696239, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994544133645066, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9996656441852745, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.129.mlp": [ - { - "accuracy": 0.9694223333346216, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9701834716098873, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9736576165611807, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.974667511341211, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9850237780299625, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9860340131838855, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9874754229707545, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9926565294889244, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932426913322783, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9927734313777795, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9935689940190825, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996197075497261, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996673136189776, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979475465737969, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980341655471804, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982770048847766, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993981589719433, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.130.self_attn": [ - { - "accuracy": 0.9896191904615415, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9900664406779566, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9916637759704731, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9932043553624106, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951025829650462, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951498339099711, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971351041213462, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971915604669208, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973460915804792, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974841095788992, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976053362356891, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977080287889725, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979452430393154, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9981034219534578, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998824103018514, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989680659491569, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999116462156005, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994232939145724, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9996752935617942, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.130.mlp": [ - { - "accuracy": 0.9683212011464333, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9691125156455919, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9726921449366369, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9737396903434082, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9844646089661279, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9855408377847389, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9870219292903417, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9923722558703861, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9929963069428739, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9924939272091969, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9933407511684651, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960512492658669, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9965566130551068, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978670470392037, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9979613533329317, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998211588434826, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993809824487758, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.131.self_attn": [ - { - "accuracy": 0.9901829589599449, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9905074841195816, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9919693129333226, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993553753635895, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951885770638719, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9951691247253237, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970772819912159, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971294305042216, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969770615111644, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974305563986203, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976024638040384, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976995128795112, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997906763431322, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980865266897079, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987608811566852, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998953367727441, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990485904487095, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994450536170533, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9996549830922097, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.131.mlp": [ - { - "accuracy": 0.9666636469528863, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.967507961451223, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9711805139913371, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9722612855073652, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9836108937467399, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9847864812534106, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9862939346777765, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9919209910409623, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9925984452527604, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9920520857711764, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9929740785277987, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9958039333333114, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9963579080067575, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977142527503403, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978252401955328, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980813877742836, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993176839887287, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.132.self_attn": [ - { - "accuracy": 0.9871335744759754, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.987930497228119, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9895856427028775, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9915615354762658, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.993943700873244, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9939666598250991, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9963852970885407, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9964573077342816, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967860475207042, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970133320036295, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970595174542579, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972181083744784, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974659897301248, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976821110743147, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985643278586825, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987726443612605, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9989748250870769, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993352013836173, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9996319898958685, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.132.mlp": [ - { - "accuracy": 0.9657573847002104, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9666243938631133, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9703975357115269, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9715226679844292, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9831904631696249, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9843596212173763, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9859235357787264, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9916907468761661, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9923727000377288, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9918515936128403, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9927681004628539, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9957091220973158, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9962615826240692, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976740573826981, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977854389182635, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980529084875199, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9993255054409689, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.133.self_attn": [ - { - "accuracy": 0.9866277778423146, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9871690612237313, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9888264257215748, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9911815941480822, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9934283180995599, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9935462941954795, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9960503429419508, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9962316113769224, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966025558104249, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966798256592531, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967965167383418, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969483712124393, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972315068344438, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974645986956986, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983593145397639, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998675331225202, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987625390793639, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992867874113328, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9996072702388598, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.133.mlp": [ - { - "accuracy": 0.9636692473370778, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9645914286375046, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9685720017081813, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9697649495578126, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9821634058301386, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9834242379292846, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9850320133233541, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9910803839907443, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9918199552860307, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.991283649554182, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9922636122638849, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953512650742931, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9959358469043907, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973721857501292, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974206762976552, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976878990112853, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9987483056703288, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.134.self_attn": [ - { - "accuracy": 0.976930464617908, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9777815115981197, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9808747214510253, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9848334873585325, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9885540409748884, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9886119070855018, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9930798349117762, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9931307671052453, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.994264767578754, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9946678879082596, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9945570419812085, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9949061031281752, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9954080852361298, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9958969441879737, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975001476827616, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978445855790357, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982879883732254, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988029308886709, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9994455062903733, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.134.mlp": [ - { - "accuracy": 0.9614592327883369, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9624291721143221, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9666369667178706, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9679388674466234, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9810007466493469, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9823713387411676, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9840996314428354, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9903996944623558, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9912376498901531, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.990703809148583, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9917427158767456, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.995040732729984, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9956637699843237, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9971857607903841, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973522076493522, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976616243366152, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988770430576742, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.135.self_attn": [ - { - "accuracy": 0.9832704371625656, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9840587948105837, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9865548892534877, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9892642687712061, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9919720747704176, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9920874950230906, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9952710378228834, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953763426747173, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9955842163854915, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996047977370357, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9959612876225851, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9962295867306622, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9966124812459671, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.997034998022412, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9980806149787417, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9984448628965765, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985684943446693, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9991462181542853, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995641792698543, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.135.mlp": [ - { - "accuracy": 0.9597117532240718, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9607379612954039, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9651618152856827, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9665313481696343, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.98013178685582, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9817122839587299, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9833856956837209, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9898159344876675, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.990775454985468, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9902051268390527, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.991355270269866, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9947241859657592, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9953943904072634, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9968885762469941, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969138086797964, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972135256363177, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982103829112786, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.136.self_attn": [ - { - "accuracy": 0.9829979189426491, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.984050323500445, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9868446214516696, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9897253522649407, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9920162416161283, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9921743215895013, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9957516639444389, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9958078320579309, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9961016977965635, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9964027799126741, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9962420752394552, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9964435981390508, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9969290074353155, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9972106361835215, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982875449901545, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9985476928260667, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9988154256624464, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992086969064794, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9995901350170913, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.136.mlp": [ - { - "accuracy": 0.9573973286896944, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9586446079376497, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9628069945856145, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9640985060679286, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9788982197642326, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9806305521510934, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9822511687678727, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.98915416124816, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9900010156592256, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.989365227177347, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9906524556786999, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9942225045034367, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9950042669416258, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996585732901861, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9967824953216079, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9970728386879751, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9983114253080106, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.137.self_attn": [ - { - "accuracy": 0.9899233150982151, - "total_bits": 320757760, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9904457696370388, - "total_bits": 329080832, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9919186557005895, - "total_bits": 336024576, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.99337605426186, - "total_bits": 401557504, - "q_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9952246866458537, - "total_bits": 475279360, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.995264904132407, - "total_bits": 475479040, - "q_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64, - "3": 64 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973720887271491, - "total_bits": 609759232, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9974196618055239, - "total_bits": 610024448, - "q_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976022818591446, - "total_bits": 615020544, - "q_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 64 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977301918705436, - "total_bits": 623951872, - "q_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9976517631500763, - "total_bits": 626473984, - "q_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9977858738091431, - "total_bits": 630355968, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998072705433802, - "total_bits": 637362176, - "q_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 64 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 64, - "4": 64 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9982311413500851, - "total_bits": 646823936, - "q_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "5": 32 - }, - "bits": [ - 5 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998914591790373, - "total_bits": 784740352, - "q_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9990682350042718, - "total_bits": 797818880, - "q_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9992522790929989, - "total_bits": 911749120, - "q_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.999458188541834, - "total_bits": 942718976, - "q_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 32 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "6": 32 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9997152320260954, - "total_bits": 1213739008, - "q_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "k_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "v_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "o_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.layers.137.mlp": [ - { - "accuracy": 0.9649464951730088, - "total_bits": 1575618640, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9659683380864168, - "total_bits": 1632503888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "6": 32, - "3": 64, - "2": 64 - }, - "bits": [ - 6, - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.2, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9692259756358046, - "total_bits": 1823397888, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "3": 32 - }, - "bits": [ - 5, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9701831474115974, - "total_bits": 2046482432, - "gate_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "3": 64, - "2": 64 - }, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.3, - 0.7 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9832211880895653, - "total_bits": 2306033760, - "gate_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 128, - "3": 128 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128, - "3": 128 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9846236185709897, - "total_bits": 2368669696, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32, - "3": 32 - }, - "bits": [ - 8, - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9857400215083831, - "total_bits": 2547224672, - "gate_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32, - "3": 32 - }, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9913087746765661, - "total_bits": 2912382048, - "gate_proj": { - "group_size": { - "4": 128 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 128 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9920902707074818, - "total_bits": 2955347968, - "gate_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "4": 32 - }, - "bits": [ - 4 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "4": 32 - }, - "bits": [ - 8, - 4 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9916673996240685, - "total_bits": 2998880352, - "gate_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 128, - "4": 128 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 128, - "4": 128 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9926953285589422, - "total_bits": 3061516288, - "gate_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "5": 32, - "4": 32 - }, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "5": 32, - "4": 32 - }, - "bits": [ - 8, - 5, - 4 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9955397811105573, - "total_bits": 3691726944, - "gate_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128, - "5": 128 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128, - "5": 128 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.996146851782932, - "total_bits": 3754362880, - "gate_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 32, - "5": 32 - }, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 32, - "5": 32 - }, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.1, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9973594358705572, - "total_bits": 4276045920, - "gate_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "6": 128 - }, - "bits": [ - 6 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 32, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9975996022296482, - "total_bits": 4425400832, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.15, - 0.85 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.9978549610088138, - "total_bits": 4823859712, - "gate_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128, - "6": 128 - }, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - }, - { - "accuracy": 0.998898987119135, - "total_bits": 5660623360, - "gate_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "up_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - }, - "down_proj": { - "group_size": { - "8": 128 - }, - "bits": [ - 8 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4 - } - } - ], - "model.norm.norm": null, - "lm_head.linear": null - }, - "last_module_idx": 278 -} \ No newline at end of file