|
{ |
|
"measurement": { |
|
"model.layers.0": { |
|
"accuracy": 0.9234118461608887, |
|
"total_bits": 685474112, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.15, |
|
0.85 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.15, |
|
0.85 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.1": { |
|
"accuracy": 0.9103550910949707, |
|
"total_bits": 744980800, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.4, |
|
0.6 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.4, |
|
0.6 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.2": { |
|
"accuracy": 0.9337918758392334, |
|
"total_bits": 1058505024, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.5, |
|
0.5 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.5, |
|
0.5 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.5, |
|
0.5 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.5, |
|
0.5 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.5, |
|
0.5 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.3": { |
|
"accuracy": 0.9201955795288086, |
|
"total_bits": 676823360, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.4": { |
|
"accuracy": 0.9312362670898438, |
|
"total_bits": 665813312, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.5": { |
|
"accuracy": 0.9329456090927124, |
|
"total_bits": 744980800, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.4, |
|
0.6 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.4, |
|
0.6 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.6": { |
|
"accuracy": 0.9207742214202881, |
|
"total_bits": 744980800, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.4, |
|
0.6 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.4, |
|
0.6 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.7": { |
|
"accuracy": 0.9351202249526978, |
|
"total_bits": 882344256, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.8": { |
|
"accuracy": 0.9291115999221802, |
|
"total_bits": 882344256, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.9": { |
|
"accuracy": 0.9229476451873779, |
|
"total_bits": 882344256, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.10": { |
|
"accuracy": 0.9183874130249023, |
|
"total_bits": 882344256, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.11": { |
|
"accuracy": 0.9176948070526123, |
|
"total_bits": 882344256, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.12": { |
|
"accuracy": 0.9242410659790039, |
|
"total_bits": 948404544, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.13": { |
|
"accuracy": 0.9212267398834229, |
|
"total_bits": 948404544, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.14": { |
|
"accuracy": 0.9170515537261963, |
|
"total_bits": 948404544, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.15": { |
|
"accuracy": 0.9178385734558105, |
|
"total_bits": 948404544, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.16": { |
|
"accuracy": 0.9178411960601807, |
|
"total_bits": 948404544, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.17": { |
|
"accuracy": 0.9196867942810059, |
|
"total_bits": 948404544, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.18": { |
|
"accuracy": 0.9236702919006348, |
|
"total_bits": 948404544, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.19": { |
|
"accuracy": 0.9250991344451904, |
|
"total_bits": 948404544, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.25, |
|
0.75 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.20": { |
|
"accuracy": 0.9187812805175781, |
|
"total_bits": 882344256, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.21": { |
|
"accuracy": 0.9207375049591064, |
|
"total_bits": 882344256, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.22": { |
|
"accuracy": 0.9287240505218506, |
|
"total_bits": 882344256, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.23": { |
|
"accuracy": 0.9161067008972168, |
|
"total_bits": 744980800, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.4, |
|
0.6 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.4, |
|
0.6 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.24": { |
|
"accuracy": 0.9204282760620117, |
|
"total_bits": 744980800, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.4, |
|
0.6 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.4, |
|
0.6 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.25": { |
|
"accuracy": 0.9228081703186035, |
|
"total_bits": 744980800, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.4, |
|
0.6 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.4, |
|
0.6 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.26": { |
|
"accuracy": 0.92047119140625, |
|
"total_bits": 694649152, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.2, |
|
0.8 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.2, |
|
0.8 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.27": { |
|
"accuracy": 0.9243440628051758, |
|
"total_bits": 694649152, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.2, |
|
0.8 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.2, |
|
0.8 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.28": { |
|
"accuracy": 0.9227378368377686, |
|
"total_bits": 676823360, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.29": { |
|
"accuracy": 0.9246113300323486, |
|
"total_bits": 676823360, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.30": { |
|
"accuracy": 0.9254612922668457, |
|
"total_bits": 676823360, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.31": { |
|
"accuracy": 0.927114725112915, |
|
"total_bits": 676823360, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.32": { |
|
"accuracy": 0.9271104335784912, |
|
"total_bits": 676823360, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.33": { |
|
"accuracy": 0.9283251762390137, |
|
"total_bits": 676823360, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.34": { |
|
"accuracy": 0.9261393547058105, |
|
"total_bits": 676823360, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.35": { |
|
"accuracy": 0.925363302230835, |
|
"total_bits": 676823360, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.36": { |
|
"accuracy": 0.922121524810791, |
|
"total_bits": 673677632, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.09, |
|
0.91 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.09, |
|
0.91 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.37": { |
|
"accuracy": 0.9217836856842041, |
|
"total_bits": 676823360, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.38": { |
|
"accuracy": 0.9224433898925781, |
|
"total_bits": 673677632, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.09, |
|
0.91 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.09, |
|
0.91 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.39": { |
|
"accuracy": 0.9265820980072021, |
|
"total_bits": 676823360, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
} |
|
} |
|
} |