diff --git "a/quant_strategy.json" "b/quant_strategy.json" new file mode 100644--- /dev/null +++ "b/quant_strategy.json" @@ -0,0 +1,4262 @@ +{ + "measurement": { + "model.layers.0": { + "accuracy": 0.9234118461608887, + "total_bits": 685474112, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + "model.layers.1": { + "accuracy": 0.9103550910949707, + "total_bits": 744980800, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "model.layers.2": { + "accuracy": 0.9337918758392334, + "total_bits": 1058505024, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.5, + 0.5 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.5, + 0.5 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.5, + 0.5 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.5, + 0.5 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.5, + 0.5 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "model.layers.3": { + "accuracy": 0.9201955795288086, + "total_bits": 676823360, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.4": { + "accuracy": 0.9312362670898438, + "total_bits": 665813312, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.5": { + "accuracy": 0.9329456090927124, + "total_bits": 744980800, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "model.layers.6": { + "accuracy": 0.9207742214202881, + "total_bits": 744980800, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "model.layers.7": { + "accuracy": 0.9351202249526978, + "total_bits": 882344256, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "model.layers.8": { + "accuracy": 0.9291115999221802, + "total_bits": 882344256, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "model.layers.9": { + "accuracy": 0.9229476451873779, + "total_bits": 882344256, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "model.layers.10": { + "accuracy": 0.9183874130249023, + "total_bits": 882344256, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "model.layers.11": { + "accuracy": 0.9176948070526123, + "total_bits": 882344256, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "model.layers.12": { + "accuracy": 0.9242410659790039, + "total_bits": 948404544, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.13": { + "accuracy": 0.9212267398834229, + "total_bits": 948404544, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.14": { + "accuracy": 0.9170515537261963, + "total_bits": 948404544, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.15": { + "accuracy": 0.9178385734558105, + "total_bits": 948404544, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.16": { + "accuracy": 0.9178411960601807, + "total_bits": 948404544, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.17": { + "accuracy": 0.9196867942810059, + "total_bits": 948404544, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.18": { + "accuracy": 0.9236702919006348, + "total_bits": 948404544, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.19": { + "accuracy": 0.9250991344451904, + "total_bits": 948404544, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + "model.layers.20": { + "accuracy": 0.9187812805175781, + "total_bits": 882344256, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "model.layers.21": { + "accuracy": 0.9207375049591064, + "total_bits": 882344256, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "model.layers.22": { + "accuracy": 0.9287240505218506, + "total_bits": 882344256, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "model.layers.23": { + "accuracy": 0.9161067008972168, + "total_bits": 744980800, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "model.layers.24": { + "accuracy": 0.9204282760620117, + "total_bits": 744980800, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "model.layers.25": { + "accuracy": 0.9228081703186035, + "total_bits": 744980800, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "model.layers.26": { + "accuracy": 0.92047119140625, + "total_bits": 694649152, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + "model.layers.27": { + "accuracy": 0.9243440628051758, + "total_bits": 694649152, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.02, + 0.98 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + "model.layers.28": { + "accuracy": 0.9227378368377686, + "total_bits": 676823360, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.29": { + "accuracy": 0.9246113300323486, + "total_bits": 676823360, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.30": { + "accuracy": 0.9254612922668457, + "total_bits": 676823360, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.31": { + "accuracy": 0.927114725112915, + "total_bits": 676823360, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.32": { + "accuracy": 0.9271104335784912, + "total_bits": 676823360, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.33": { + "accuracy": 0.9283251762390137, + "total_bits": 676823360, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.34": { + "accuracy": 0.9261393547058105, + "total_bits": 676823360, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.35": { + "accuracy": 0.925363302230835, + "total_bits": 676823360, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.36": { + "accuracy": 0.922121524810791, + "total_bits": 673677632, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.09, + 0.91 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.09, + 0.91 + ], + "scale_bits": 4 + } + }, + "model.layers.37": { + "accuracy": 0.9217836856842041, + "total_bits": 676823360, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "model.layers.38": { + "accuracy": 0.9224433898925781, + "total_bits": 673677632, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.09, + 0.91 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.01, + 0.99 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.09, + 0.91 + ], + "scale_bits": 4 + } + }, + "model.layers.39": { + "accuracy": 0.9265820980072021, + "total_bits": 676823360, + "q_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "gate_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "4": 128, + "2": 128 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + } +} \ No newline at end of file