|
{ |
|
"measurement": { |
|
"model.layers.0": { |
|
"accuracy": 0.8987722396850586, |
|
"total_bits": 507787520, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.2, |
|
0.8 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.2, |
|
0.8 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.1": { |
|
"accuracy": 0.9002890586853027, |
|
"total_bits": 478689536, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.2, |
|
0.8 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.2, |
|
0.8 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.2": { |
|
"accuracy": 0.9608855843544006, |
|
"total_bits": 456407296, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.3": { |
|
"accuracy": 0.9555550813674927, |
|
"total_bits": 456407296, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.4": { |
|
"accuracy": 0.9499313831329346, |
|
"total_bits": 456407296, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.5": { |
|
"accuracy": 0.94085693359375, |
|
"total_bits": 456407296, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.6": { |
|
"accuracy": 0.9383460283279419, |
|
"total_bits": 456407296, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.7": { |
|
"accuracy": 0.9287691116333008, |
|
"total_bits": 456407296, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.8": { |
|
"accuracy": 0.9277379512786865, |
|
"total_bits": 456407296, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.9": { |
|
"accuracy": 0.9248785972595215, |
|
"total_bits": 456407296, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.10": { |
|
"accuracy": 0.9232504367828369, |
|
"total_bits": 456407296, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.11": { |
|
"accuracy": 0.9223508834838867, |
|
"total_bits": 456407296, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.12": { |
|
"accuracy": 0.9154980182647705, |
|
"total_bits": 456407296, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.13": { |
|
"accuracy": 0.9131753444671631, |
|
"total_bits": 456407296, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.14": { |
|
"accuracy": 0.9053680896759033, |
|
"total_bits": 456407296, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.15": { |
|
"accuracy": 0.9041793346405029, |
|
"total_bits": 465844480, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.16": { |
|
"accuracy": 0.9069504737854004, |
|
"total_bits": 478689536, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.2, |
|
0.8 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.2, |
|
0.8 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.17": { |
|
"accuracy": 0.9034838676452637, |
|
"total_bits": 478689536, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.2, |
|
0.8 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.2, |
|
0.8 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.18": { |
|
"accuracy": 0.9072866439819336, |
|
"total_bits": 507787520, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.2, |
|
0.8 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.2, |
|
0.8 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.19": { |
|
"accuracy": 0.9008588790893555, |
|
"total_bits": 478689536, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.2, |
|
0.8 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.2, |
|
0.8 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.20": { |
|
"accuracy": 0.9053101539611816, |
|
"total_bits": 478689536, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.2, |
|
0.8 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.2, |
|
0.8 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.21": { |
|
"accuracy": 0.9090385437011719, |
|
"total_bits": 478689536, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.2, |
|
0.8 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.2, |
|
0.8 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.22": { |
|
"accuracy": 0.9069957733154297, |
|
"total_bits": 465844480, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.01, |
|
0.99 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.23": { |
|
"accuracy": 0.9098095893859863, |
|
"total_bits": 478689536, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.2, |
|
0.8 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.2, |
|
0.8 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.24": { |
|
"accuracy": 0.9100494384765625, |
|
"total_bits": 478689536, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.2, |
|
0.8 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.2, |
|
0.8 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.25": { |
|
"accuracy": 0.9088339805603027, |
|
"total_bits": 478689536, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.2, |
|
0.8 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.2, |
|
0.8 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.26": { |
|
"accuracy": 0.9072427749633789, |
|
"total_bits": 478689536, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.2, |
|
0.8 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.2, |
|
0.8 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.27": { |
|
"accuracy": 0.9096179008483887, |
|
"total_bits": 478689536, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.2, |
|
0.8 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.2, |
|
0.8 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.28": { |
|
"accuracy": 0.9028897285461426, |
|
"total_bits": 478689536, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.2, |
|
0.8 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.02, |
|
0.98 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.2, |
|
0.8 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.29": { |
|
"accuracy": 0.902318000793457, |
|
"total_bits": 513030400, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.4, |
|
0.6 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.05, |
|
0.95 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.4, |
|
0.6 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.30": { |
|
"accuracy": 0.9191799163818359, |
|
"total_bits": 607664384, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
} |
|
}, |
|
"model.layers.31": { |
|
"accuracy": 0.9125514030456543, |
|
"total_bits": 607664384, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128, |
|
"2": 128 |
|
}, |
|
"bits": [ |
|
4, |
|
2 |
|
], |
|
"bits_prop": [ |
|
0.1, |
|
0.9 |
|
], |
|
"scale_bits": 4 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1.0 |
|
], |
|
"scale_bits": 4 |
|
} |
|
} |
|
} |
|
} |