| { |
| "measurement": { |
| "model.layers.0": { |
| "accuracy": 0.9642382374731824, |
| "total_bits": 1516339200.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.1": { |
| "accuracy": 0.9603531114989892, |
| "total_bits": 1673707520.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.2": { |
| "accuracy": 0.9612206381279975, |
| "total_bits": 1653227520.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.3": { |
| "accuracy": 0.958331938716583, |
| "total_bits": 1653227520.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.4": { |
| "accuracy": 0.9613574232207611, |
| "total_bits": 1653227520.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.5": { |
| "accuracy": 0.958199224784039, |
| "total_bits": 1673707520.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.6": { |
| "accuracy": 0.970316064893268, |
| "total_bits": 1368227840.0, |
| "o_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.7": { |
| "accuracy": 0.968708845321089, |
| "total_bits": 1061437440.0, |
| "o_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.8": { |
| "accuracy": 0.9669827512698248, |
| "total_bits": 1136803840.0, |
| "o_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.9": { |
| "accuracy": 0.9755806907487568, |
| "total_bits": 1061437440.0, |
| "o_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.10": { |
| "accuracy": 0.9769101596029941, |
| "total_bits": 1061437440.0, |
| "o_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.11": { |
| "accuracy": 0.975106158584822, |
| "total_bits": 1061437440.0, |
| "o_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.12": { |
| "accuracy": 0.9743777616822626, |
| "total_bits": 1061437440.0, |
| "o_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.13": { |
| "accuracy": 0.9739925485046115, |
| "total_bits": 1061437440.0, |
| "o_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.14": { |
| "accuracy": 0.9755846043408383, |
| "total_bits": 1061437440.0, |
| "o_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.15": { |
| "accuracy": 0.9762604041316081, |
| "total_bits": 1061437440.0, |
| "o_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.16": { |
| "accuracy": 0.9772902538825292, |
| "total_bits": 1061437440.0, |
| "o_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.17": { |
| "accuracy": 0.9770755698264111, |
| "total_bits": 1061437440.0, |
| "o_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.18": { |
| "accuracy": 0.9760004513082094, |
| "total_bits": 1061437440.0, |
| "o_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.19": { |
| "accuracy": 0.975121202878654, |
| "total_bits": 1061437440.0, |
| "o_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.20": { |
| "accuracy": 0.9728874812135473, |
| "total_bits": 1061437440.0, |
| "o_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.21": { |
| "accuracy": 0.9694525471422821, |
| "total_bits": 1061437440.0, |
| "o_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.22": { |
| "accuracy": 0.9668388536083512, |
| "total_bits": 1061437440.0, |
| "o_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.23": { |
| "accuracy": 0.965998790517915, |
| "total_bits": 1061437440.0, |
| "o_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.24": { |
| "accuracy": 0.9616168590146117, |
| "total_bits": 1061437440.0, |
| "o_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.25": { |
| "accuracy": 0.9592678159242496, |
| "total_bits": 1061437440.0, |
| "o_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.26": { |
| "accuracy": 0.9558643582277, |
| "total_bits": 1061437440.0, |
| "o_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.27": { |
| "accuracy": 0.9620310880127363, |
| "total_bits": 1146060800.0, |
| "o_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.28": { |
| "accuracy": 0.9612902893568389, |
| "total_bits": 1136803840.0, |
| "o_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.29": { |
| "accuracy": 0.9550080706831068, |
| "total_bits": 1061437440.0, |
| "o_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.30": { |
| "accuracy": 0.9559439130243845, |
| "total_bits": 1061437440.0, |
| "o_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.31": { |
| "accuracy": 0.9563879903871566, |
| "total_bits": 1061437440.0, |
| "o_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.32": { |
| "accuracy": 0.9559127941611223, |
| "total_bits": 1061437440.0, |
| "o_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.33": { |
| "accuracy": 0.9582970883930102, |
| "total_bits": 1061437440.0, |
| "o_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.34": { |
| "accuracy": 0.9622450541355647, |
| "total_bits": 1061437440.0, |
| "o_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.35": { |
| "accuracy": 0.9604014182696119, |
| "total_bits": 1061437440.0, |
| "o_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.36": { |
| "accuracy": 0.9555513958912343, |
| "total_bits": 1061437440.0, |
| "o_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.37": { |
| "accuracy": 0.9593434340786189, |
| "total_bits": 1136803840.0, |
| "o_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.38": { |
| "accuracy": 0.9587914213771, |
| "total_bits": 1136803840.0, |
| "o_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.39": { |
| "accuracy": 0.9584287352627143, |
| "total_bits": 1136803840.0, |
| "o_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.40": { |
| "accuracy": 0.9576056672958657, |
| "total_bits": 1146060800.0, |
| "o_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.41": { |
| "accuracy": 0.9578288702759892, |
| "total_bits": 1146060800.0, |
| "o_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.42": { |
| "accuracy": 0.9596846217755228, |
| "total_bits": 1210859520.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.43": { |
| "accuracy": 0.9523715777904727, |
| "total_bits": 1451540480.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.44": { |
| "accuracy": 0.9620087484945543, |
| "total_bits": 1136803840.0, |
| "o_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.45": { |
| "accuracy": 0.9595809546299279, |
| "total_bits": 1136803840.0, |
| "o_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.46": { |
| "accuracy": 0.9608659410150722, |
| "total_bits": 1146060800.0, |
| "o_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.47": { |
| "accuracy": 0.9577988529345021, |
| "total_bits": 1136803840.0, |
| "o_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.48": { |
| "accuracy": 0.9602391887456179, |
| "total_bits": 1210859520.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.49": { |
| "accuracy": 0.9616908680764027, |
| "total_bits": 1368227840.0, |
| "o_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.50": { |
| "accuracy": 0.9638307717395946, |
| "total_bits": 1451540480.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.51": { |
| "accuracy": 0.959196690004319, |
| "total_bits": 1451540480.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.52": { |
| "accuracy": 0.9582395195029676, |
| "total_bits": 1516339200.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.53": { |
| "accuracy": 0.9564991883235052, |
| "total_bits": 1673707520.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.54": { |
| "accuracy": 0.9609911639709026, |
| "total_bits": 1673707520.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.55": { |
| "accuracy": 0.9612903955276124, |
| "total_bits": 1673707520.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.56": { |
| "accuracy": 0.9615092970780097, |
| "total_bits": 1673707520.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.57": { |
| "accuracy": 0.9581882330821827, |
| "total_bits": 1653227520.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.58": { |
| "accuracy": 0.9577868964988738, |
| "total_bits": 1136803840.0, |
| "o_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 32 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.59": { |
| "accuracy": 0.989684437867254, |
| "total_bits": 1061437440.0, |
| "o_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.60": { |
| "accuracy": 0.9913747301325202, |
| "total_bits": 1061437440.0, |
| "o_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.61": { |
| "accuracy": 0.9891216587275267, |
| "total_bits": 1061437440.0, |
| "o_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.62": { |
| "accuracy": 0.9859952349215746, |
| "total_bits": 1061437440.0, |
| "o_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.63": { |
| "accuracy": 0.9627033434808254, |
| "total_bits": 1061437440.0, |
| "o_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| } |
| } |
| } |
|
|