|
{ |
|
"measurement": { |
|
"model.layers.0": { |
|
"accuracy": 0.7789640426635742, |
|
"total_bits": 174452896, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.15, |
|
0.85 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.15, |
|
0.85 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.1": { |
|
"accuracy": 0.8732409477233887, |
|
"total_bits": 169865376, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.2": { |
|
"accuracy": 0.8786478042602539, |
|
"total_bits": 169865376, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.3": { |
|
"accuracy": 0.9009003639221191, |
|
"total_bits": 168554656, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.4": { |
|
"accuracy": 0.8903632164001465, |
|
"total_bits": 169865376, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.5": { |
|
"accuracy": 0.8945960998535156, |
|
"total_bits": 169865376, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.6": { |
|
"accuracy": 0.886786937713623, |
|
"total_bits": 169865376, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.7": { |
|
"accuracy": 0.8697657585144043, |
|
"total_bits": 169865376, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.8": { |
|
"accuracy": 0.8596367835998535, |
|
"total_bits": 169865376, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.9": { |
|
"accuracy": 0.8555536270141602, |
|
"total_bits": 169865376, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.10": { |
|
"accuracy": 0.8454461097717285, |
|
"total_bits": 174452896, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.15, |
|
0.85 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.15, |
|
0.85 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.11": { |
|
"accuracy": 0.8947896957397461, |
|
"total_bits": 169210016, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.12": { |
|
"accuracy": 0.9072887897491455, |
|
"total_bits": 169865376, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.13": { |
|
"accuracy": 0.9025442600250244, |
|
"total_bits": 169865376, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.14": { |
|
"accuracy": 0.9012281894683838, |
|
"total_bits": 169865376, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.15": { |
|
"accuracy": 0.8996968269348145, |
|
"total_bits": 169865376, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.16": { |
|
"accuracy": 0.8950610160827637, |
|
"total_bits": 169865376, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.17": { |
|
"accuracy": 0.893974781036377, |
|
"total_bits": 169865376, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.18": { |
|
"accuracy": 0.8896980285644531, |
|
"total_bits": 169865376, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.19": { |
|
"accuracy": 0.8897199630737305, |
|
"total_bits": 169865376, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.20": { |
|
"accuracy": 0.8838353157043457, |
|
"total_bits": 169865376, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.21": { |
|
"accuracy": 0.8887925148010254, |
|
"total_bits": 174452896, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.15, |
|
0.85 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.15, |
|
0.85 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.22": { |
|
"accuracy": 0.8880710601806641, |
|
"total_bits": 174452896, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.15, |
|
0.85 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.15, |
|
0.85 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.23": { |
|
"accuracy": 0.8821191787719727, |
|
"total_bits": 174452896, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.15, |
|
0.85 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.15, |
|
0.85 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.24": { |
|
"accuracy": 0.8900241851806641, |
|
"total_bits": 174452896, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.15, |
|
0.85 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.15, |
|
0.85 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.25": { |
|
"accuracy": 0.9082069396972656, |
|
"total_bits": 168554656, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.26": { |
|
"accuracy": 0.9334874153137207, |
|
"total_bits": 169865376, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.27": { |
|
"accuracy": 0.9333341121673584, |
|
"total_bits": 169865376, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.28": { |
|
"accuracy": 0.9326183795928955, |
|
"total_bits": 169865376, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.29": { |
|
"accuracy": 0.9348857402801514, |
|
"total_bits": 172486816, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.15, |
|
0.85 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.30": { |
|
"accuracy": 0.9331240653991699, |
|
"total_bits": 172486816, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.15, |
|
0.85 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.31": { |
|
"accuracy": 0.933929443359375, |
|
"total_bits": 174452896, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.15, |
|
0.85 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.15, |
|
0.85 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.32": { |
|
"accuracy": 0.9334988594055176, |
|
"total_bits": 174452896, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.15, |
|
0.85 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.15, |
|
0.85 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.33": { |
|
"accuracy": 0.9322116374969482, |
|
"total_bits": 174452896, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.15, |
|
0.85 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.15, |
|
0.85 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.34": { |
|
"accuracy": 0.9316282272338867, |
|
"total_bits": 174452896, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.15, |
|
0.85 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.15, |
|
0.85 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.35": { |
|
"accuracy": 0.927081823348999, |
|
"total_bits": 174452896, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.15, |
|
0.85 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.15, |
|
0.85 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.36": { |
|
"accuracy": 0.9283866882324219, |
|
"total_bits": 176418976, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.2, |
|
0.8 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.2, |
|
0.8 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.37": { |
|
"accuracy": 0.9364392757415771, |
|
"total_bits": 186249376, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.4, |
|
0.6 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.4, |
|
0.6 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.38": { |
|
"accuracy": 0.9330251216888428, |
|
"total_bits": 223867040, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.35, |
|
0.65 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.35, |
|
0.65 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.5, |
|
0.5 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.35, |
|
0.65 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.35, |
|
0.65 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.35, |
|
0.65 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.5, |
|
0.5 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.39": { |
|
"accuracy": 0.9361402988433838, |
|
"total_bits": 223867040, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.35, |
|
0.65 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.35, |
|
0.65 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.5, |
|
0.5 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.35, |
|
0.65 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.35, |
|
0.65 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.35, |
|
0.65 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.5, |
|
0.5 |
|
], |
|
"scale_bits": 4 |
|
} |
|
} |
|
} |
|
} |